{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c60b0277-1911-43a5-9972-942d80be03d9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2324.2090878679414, 1853.7660910962848, 2110.3783820697104, 1737.6735788821486, 2746.053823279685, 1421.332901389793, 4753.918291682923, 4448.325847695072, 4688.084811318067, 4084.7632528006793, 4237.786681681212, 4551.01862460752, 2477.4557106871666, 3001.6283418289677, 2625.4934815821453, 1667.8004919795744, 1996.6225299897271, 1318.4626055138983, 844.3291062612952, 1094.1446723635613, 1059.8211785082972, 927.0240548506282, 829.2321296070824, 3061.6144177378933]\n"
     ]
    }
   ],
   "source": [
    "### 1 sample\n",
    "#hessian_trace = {'model.decoder.embed_tokens.weight': 1.1185309799289643e-05, 'model.decoder.embed_positions.weight': 8.143285607419362e-06, 'model.decoder.final_layer_norm.weight': 0.002161037642508745, 'model.decoder.final_layer_norm.bias': 0.004603024572134018, 'model.decoder.layers.0.self_attn.k_proj.weight': 4.645156877813861e-05, 'model.decoder.layers.0.self_attn.k_proj.bias': 3.031572493039647e-10, 'model.decoder.layers.0.self_attn.v_proj.weight': 1.66144673130475e-05, 'model.decoder.layers.0.self_attn.v_proj.bias': 0.0019134297035634518, 'model.decoder.layers.0.self_attn.q_proj.weight': 2.0651876184274442e-05, 'model.decoder.layers.0.self_attn.q_proj.bias': 0.00010981145896948874, 'model.decoder.layers.0.self_attn.out_proj.weight': 7.775293488521129e-05, 'model.decoder.layers.0.self_attn.out_proj.bias': 0.00035202689468860626, 'model.decoder.layers.0.self_attn_layer_norm.weight': 0.0013620768440887332, 'model.decoder.layers.0.self_attn_layer_norm.bias': 0.007437482010573149, 'model.decoder.layers.0.fc1.weight': 0.00012652759323827922, 'model.decoder.layers.0.fc1.bias': 0.0024492822121828794, 'model.decoder.layers.0.fc2.weight': 2.7213727662456222e-05, 'model.decoder.layers.0.fc2.bias': 0.008369727060198784, 'model.decoder.layers.0.final_layer_norm.weight': 0.001489672577008605, 'model.decoder.layers.0.final_layer_norm.bias': 0.0009513160330243409, 'model.decoder.layers.1.self_attn.k_proj.weight': 6.936030331416987e-06, 'model.decoder.layers.1.self_attn.k_proj.bias': 1.5926332475046934e-10, 'model.decoder.layers.1.self_attn.v_proj.weight': 0.00025346881011500955, 'model.decoder.layers.1.self_attn.v_proj.bias': 0.0021952157840132713, 'model.decoder.layers.1.self_attn.q_proj.weight': 1.818699274735991e-05, 'model.decoder.layers.1.self_attn.q_proj.bias': 0.0004335007688496262, 'model.decoder.layers.1.self_attn.out_proj.weight': 1.7706663129501976e-05, 'model.decoder.layers.1.self_attn.out_proj.bias': 0.010794132947921753, 'model.decoder.layers.1.self_attn_layer_norm.weight': 0.004621668718755245, 'model.decoder.layers.1.self_attn_layer_norm.bias': 0.006846331059932709, 'model.decoder.layers.1.fc1.weight': 5.2937732107238844e-05, 'model.decoder.layers.1.fc1.bias': 0.0014727330999448895, 'model.decoder.layers.1.fc2.weight': 7.398500201816205e-06, 'model.decoder.layers.1.fc2.bias': 0.03257054463028908, 'model.decoder.layers.1.final_layer_norm.weight': 0.0019106791587546468, 'model.decoder.layers.1.final_layer_norm.bias': 0.0022776746191084385, 'model.decoder.layers.2.self_attn.k_proj.weight': 6.018878593749832e-06, 'model.decoder.layers.2.self_attn.k_proj.bias': 2.9858293615347975e-10, 'model.decoder.layers.2.self_attn.v_proj.weight': 7.255269156303257e-05, 'model.decoder.layers.2.self_attn.v_proj.bias': 0.0014476566575467587, 'model.decoder.layers.2.self_attn.q_proj.weight': 6.528708581754472e-06, 'model.decoder.layers.2.self_attn.q_proj.bias': 0.001365584321320057, 'model.decoder.layers.2.self_attn.out_proj.weight': 1.976549538085237e-05, 'model.decoder.layers.2.self_attn.out_proj.bias': 0.009294230490922928, 'model.decoder.layers.2.self_attn_layer_norm.weight': 0.000535756116732955, 'model.decoder.layers.2.self_attn_layer_norm.bias': 0.001856120303273201, 'model.decoder.layers.2.fc1.weight': 3.177683538524434e-05, 'model.decoder.layers.2.fc1.bias': 2.412831236142665e-05, 'model.decoder.layers.2.fc2.weight': 1.818887085391907e-07, 'model.decoder.layers.2.fc2.bias': 0.022226765751838684, 'model.decoder.layers.2.final_layer_norm.weight': 0.007943467237055302, 'model.decoder.layers.2.final_layer_norm.bias': 0.0022404014598578215, 'model.decoder.layers.3.self_attn.k_proj.weight': 1.151437663793331e-05, 'model.decoder.layers.3.self_attn.k_proj.bias': 3.3940850130420586e-10, 'model.decoder.layers.3.self_attn.v_proj.weight': 4.8154910473385826e-05, 'model.decoder.layers.3.self_attn.v_proj.bias': 0.01699797995388508, 'model.decoder.layers.3.self_attn.q_proj.weight': 2.3673514078836888e-05, 'model.decoder.layers.3.self_attn.q_proj.bias': 0.0005298053729347885, 'model.decoder.layers.3.self_attn.out_proj.weight': 2.1154875867068768e-05, 'model.decoder.layers.3.self_attn.out_proj.bias': 0.0014614267274737358, 'model.decoder.layers.3.self_attn_layer_norm.weight': 0.005598359275609255, 'model.decoder.layers.3.self_attn_layer_norm.bias': 0.000933143135625869, 'model.decoder.layers.3.fc1.weight': 4.553344115265645e-05, 'model.decoder.layers.3.fc1.bias': 0.0012870586942881346, 'model.decoder.layers.3.fc2.weight': 1.06591141957324e-05, 'model.decoder.layers.3.fc2.bias': 0.012304415926337242, 'model.decoder.layers.3.final_layer_norm.weight': 0.0007680617272853851, 'model.decoder.layers.3.final_layer_norm.bias': 0.005956999026238918, 'model.decoder.layers.4.self_attn.k_proj.weight': 2.699195647437591e-06, 'model.decoder.layers.4.self_attn.k_proj.bias': 7.5040951230676e-10, 'model.decoder.layers.4.self_attn.v_proj.weight': 0.00021728238789364696, 'model.decoder.layers.4.self_attn.v_proj.bias': 0.0034294729121029377, 'model.decoder.layers.4.self_attn.q_proj.weight': 4.4106283894507214e-05, 'model.decoder.layers.4.self_attn.q_proj.bias': 0.0004717133124358952, 'model.decoder.layers.4.self_attn.out_proj.weight': 3.1982865039026365e-05, 'model.decoder.layers.4.self_attn.out_proj.bias': 0.04290454462170601, 'model.decoder.layers.4.self_attn_layer_norm.weight': 0.0024552810937166214, 'model.decoder.layers.4.self_attn_layer_norm.bias': 0.0026056580245494843, 'model.decoder.layers.4.fc1.weight': 3.265196210122667e-05, 'model.decoder.layers.4.fc1.bias': 0.0013350797817111015, 'model.decoder.layers.4.fc2.weight': 6.125213985797018e-06, 'model.decoder.layers.4.fc2.bias': 0.0204729363322258, 'model.decoder.layers.4.final_layer_norm.weight': 0.006313554476946592, 'model.decoder.layers.4.final_layer_norm.bias': 0.0018668060656636953, 'model.decoder.layers.5.self_attn.k_proj.weight': 1.9550689103198238e-05, 'model.decoder.layers.5.self_attn.k_proj.bias': 4.5481396426794163e-10, 'model.decoder.layers.5.self_attn.v_proj.weight': 0.0002496642409823835, 'model.decoder.layers.5.self_attn.v_proj.bias': 0.002697425428777933, 'model.decoder.layers.5.self_attn.q_proj.weight': 1.265824175789021e-05, 'model.decoder.layers.5.self_attn.q_proj.bias': 0.0002674320712685585, 'model.decoder.layers.5.self_attn.out_proj.weight': 0.00011045326391467825, 'model.decoder.layers.5.self_attn.out_proj.bias': 0.014382628723978996, 'model.decoder.layers.5.self_attn_layer_norm.weight': 0.001466980203986168, 'model.decoder.layers.5.self_attn_layer_norm.bias': 0.0034044026397168636, 'model.decoder.layers.5.fc1.weight': 5.873205282114213e-06, 'model.decoder.layers.5.fc1.bias': 8.20957066025585e-05, 'model.decoder.layers.5.fc2.weight': 4.40867825091118e-06, 'model.decoder.layers.5.fc2.bias': 0.018928296864032745, 'model.decoder.layers.5.final_layer_norm.weight': 0.0011564877349883318, 'model.decoder.layers.5.final_layer_norm.bias': 0.0016948427073657513, 'model.decoder.layers.6.self_attn.k_proj.weight': 1.5418418115586974e-06, 'model.decoder.layers.6.self_attn.k_proj.bias': 2.9045921223769255e-10, 'model.decoder.layers.6.self_attn.v_proj.weight': 0.0007602623663842678, 'model.decoder.layers.6.self_attn.v_proj.bias': 0.011044119484722614, 'model.decoder.layers.6.self_attn.q_proj.weight': 2.2584390535485e-07, 'model.decoder.layers.6.self_attn.q_proj.bias': 0.0008485889993607998, 'model.decoder.layers.6.self_attn.out_proj.weight': 3.578695759642869e-05, 'model.decoder.layers.6.self_attn.out_proj.bias': 0.008568225428462029, 'model.decoder.layers.6.self_attn_layer_norm.weight': 0.00713425362482667, 'model.decoder.layers.6.self_attn_layer_norm.bias': 0.014377156272530556, 'model.decoder.layers.6.fc1.weight': 4.1227933252230287e-05, 'model.decoder.layers.6.fc1.bias': 0.00033436791272833943, 'model.decoder.layers.6.fc2.weight': 6.13882457400905e-06, 'model.decoder.layers.6.fc2.bias': 0.018310433253645897, 'model.decoder.layers.6.final_layer_norm.weight': 0.0007947176927700639, 'model.decoder.layers.6.final_layer_norm.bias': 0.004018186591565609, 'model.decoder.layers.7.self_attn.k_proj.weight': 1.8655551684787497e-05, 'model.decoder.layers.7.self_attn.k_proj.bias': 4.4433434709389985e-10, 'model.decoder.layers.7.self_attn.v_proj.weight': 0.0004696718533523381, 'model.decoder.layers.7.self_attn.v_proj.bias': 0.010697084479033947, 'model.decoder.layers.7.self_attn.q_proj.weight': 1.0651410775608383e-05, 'model.decoder.layers.7.self_attn.q_proj.bias': 0.0009541820036247373, 'model.decoder.layers.7.self_attn.out_proj.weight': 2.0471183233894408e-05, 'model.decoder.layers.7.self_attn.out_proj.bias': 0.04106755927205086, 'model.decoder.layers.7.self_attn_layer_norm.weight': 0.003637113608419895, 'model.decoder.layers.7.self_attn_layer_norm.bias': 0.003511386923491955, 'model.decoder.layers.7.fc1.weight': 3.541999467415735e-05, 'model.decoder.layers.7.fc1.bias': 0.00017271609976887703, 'model.decoder.layers.7.fc2.weight': 6.586087692994624e-07, 'model.decoder.layers.7.fc2.bias': 0.0007914276793599129, 'model.decoder.layers.7.final_layer_norm.weight': 0.0016061868518590927, 'model.decoder.layers.7.final_layer_norm.bias': 0.0006164757069200277, 'model.decoder.layers.8.self_attn.k_proj.weight': 1.312786935159238e-05, 'model.decoder.layers.8.self_attn.k_proj.bias': 4.789022511886287e-10, 'model.decoder.layers.8.self_attn.v_proj.weight': 0.00083456642460078, 'model.decoder.layers.8.self_attn.v_proj.bias': 0.0013190273893997073, 'model.decoder.layers.8.self_attn.q_proj.weight': 4.527746204985306e-05, 'model.decoder.layers.8.self_attn.q_proj.bias': 0.0003894694964401424, 'model.decoder.layers.8.self_attn.out_proj.weight': 8.155950126820244e-06, 'model.decoder.layers.8.self_attn.out_proj.bias': 0.031470075249671936, 'model.decoder.layers.8.self_attn_layer_norm.weight': 0.0021222857758402824, 'model.decoder.layers.8.self_attn_layer_norm.bias': 0.0019304797751829028, 'model.decoder.layers.8.fc1.weight': 2.3366730601992458e-05, 'model.decoder.layers.8.fc1.bias': 0.0016706563765183091, 'model.decoder.layers.8.fc2.weight': 1.011752010526834e-05, 'model.decoder.layers.8.fc2.bias': 0.015386030077934265, 'model.decoder.layers.8.final_layer_norm.weight': 0.004658102057874203, 'model.decoder.layers.8.final_layer_norm.bias': 0.006879071705043316, 'model.decoder.layers.9.self_attn.k_proj.weight': 1.7018075595842674e-05, 'model.decoder.layers.9.self_attn.k_proj.bias': 7.391953715796262e-11, 'model.decoder.layers.9.self_attn.v_proj.weight': 0.0006141822086647153, 'model.decoder.layers.9.self_attn.v_proj.bias': 0.004593817517161369, 'model.decoder.layers.9.self_attn.q_proj.weight': 2.1341147657949477e-05, 'model.decoder.layers.9.self_attn.q_proj.bias': 0.0019551655277609825, 'model.decoder.layers.9.self_attn.out_proj.weight': 5.762240107287653e-05, 'model.decoder.layers.9.self_attn.out_proj.bias': 0.008541066199541092, 'model.decoder.layers.9.self_attn_layer_norm.weight': 0.00260370085015893, 'model.decoder.layers.9.self_attn_layer_norm.bias': 0.002429370302706957, 'model.decoder.layers.9.fc1.weight': 1.5558865925413556e-05, 'model.decoder.layers.9.fc1.bias': 0.0016150367446243763, 'model.decoder.layers.9.fc2.weight': 1.7371028661727905e-05, 'model.decoder.layers.9.fc2.bias': 0.0010262508876621723, 'model.decoder.layers.9.final_layer_norm.weight': 0.0006154445582069457, 'model.decoder.layers.9.final_layer_norm.bias': 0.004642384126782417, 'model.decoder.layers.10.self_attn.k_proj.weight': 1.7195801774505526e-05, 'model.decoder.layers.10.self_attn.k_proj.bias': 7.461231632532872e-11, 'model.decoder.layers.10.self_attn.v_proj.weight': 0.0006180479540489614, 'model.decoder.layers.10.self_attn.v_proj.bias': 0.0049537960439920425, 'model.decoder.layers.10.self_attn.q_proj.weight': 1.5819550753803924e-06, 'model.decoder.layers.10.self_attn.q_proj.bias': 0.0015959527809172869, 'model.decoder.layers.10.self_attn.out_proj.weight': 7.227572496049106e-05, 'model.decoder.layers.10.self_attn.out_proj.bias': 0.0029820981435477734, 'model.decoder.layers.10.self_attn_layer_norm.weight': 0.005492689553648233, 'model.decoder.layers.10.self_attn_layer_norm.bias': 0.0049917567521333694, 'model.decoder.layers.10.fc1.weight': 1.7441607269574888e-06, 'model.decoder.layers.10.fc1.bias': 0.000596581376157701, 'model.decoder.layers.10.fc2.weight': 8.243401680374518e-06, 'model.decoder.layers.10.fc2.bias': 0.0008291625417768955, 'model.decoder.layers.10.final_layer_norm.weight': 0.0004895663587376475, 'model.decoder.layers.10.final_layer_norm.bias': 0.005814571864902973, 'model.decoder.layers.11.self_attn.k_proj.weight': 1.6726547983125784e-05, 'model.decoder.layers.11.self_attn.k_proj.bias': 7.352944919603033e-10, 'model.decoder.layers.11.self_attn.v_proj.weight': 0.0005792105803266168, 'model.decoder.layers.11.self_attn.v_proj.bias': 0.0014454503543674946, 'model.decoder.layers.11.self_attn.q_proj.weight': 1.984708069358021e-05, 'model.decoder.layers.11.self_attn.q_proj.bias': 0.0017196566332131624, 'model.decoder.layers.11.self_attn.out_proj.weight': 7.973126048455015e-05, 'model.decoder.layers.11.self_attn.out_proj.bias': 0.0002526906318962574, 'model.decoder.layers.11.self_attn_layer_norm.weight': 0.0006384402513504028, 'model.decoder.layers.11.self_attn_layer_norm.bias': 0.014260699972510338, 'model.decoder.layers.11.fc1.weight': 3.445312177063897e-05, 'model.decoder.layers.11.fc1.bias': 0.00018588214879855514, 'model.decoder.layers.11.fc2.weight': 2.20234619519033e-06, 'model.decoder.layers.11.fc2.bias': 0.023138443008065224, 'model.decoder.layers.11.final_layer_norm.weight': 0.0038702962920069695, 'model.decoder.layers.11.final_layer_norm.bias': 0.0020405559334903955, 'model.decoder.layers.12.self_attn.k_proj.weight': 5.6471722018613946e-06, 'model.decoder.layers.12.self_attn.k_proj.bias': 5.683409298740116e-10, 'model.decoder.layers.12.self_attn.v_proj.weight': 0.0003623973170761019, 'model.decoder.layers.12.self_attn.v_proj.bias': 0.004810386803001165, 'model.decoder.layers.12.self_attn.q_proj.weight': 2.4387672965531237e-05, 'model.decoder.layers.12.self_attn.q_proj.bias': 0.0006049377843737602, 'model.decoder.layers.12.self_attn.out_proj.weight': 2.0980100089218467e-05, 'model.decoder.layers.12.self_attn.out_proj.bias': 0.0037242495454847813, 'model.decoder.layers.12.self_attn_layer_norm.weight': 0.0002736416645348072, 'model.decoder.layers.12.self_attn_layer_norm.bias': 0.0108397351577878, 'model.decoder.layers.12.fc1.weight': 4.7448615077883005e-05, 'model.decoder.layers.12.fc1.bias': 0.0008975025848485529, 'model.decoder.layers.12.fc2.weight': 2.871520700864494e-05, 'model.decoder.layers.12.fc2.bias': 0.011479119770228863, 'model.decoder.layers.12.final_layer_norm.weight': 0.0033557522110641003, 'model.decoder.layers.12.final_layer_norm.bias': 0.004673722665756941, 'model.decoder.layers.13.self_attn.k_proj.weight': 0.00011653665569610894, 'model.decoder.layers.13.self_attn.k_proj.bias': 1.7158043874587747e-09, 'model.decoder.layers.13.self_attn.v_proj.weight': 0.0002871536125894636, 'model.decoder.layers.13.self_attn.v_proj.bias': 0.0035168619360774755, 'model.decoder.layers.13.self_attn.q_proj.weight': 4.61002164229285e-05, 'model.decoder.layers.13.self_attn.q_proj.bias': 0.0004209924372844398, 'model.decoder.layers.13.self_attn.out_proj.weight': 4.131090099690482e-05, 'model.decoder.layers.13.self_attn.out_proj.bias': 0.00010073347948491573, 'model.decoder.layers.13.self_attn_layer_norm.weight': 0.003654373809695244, 'model.decoder.layers.13.self_attn_layer_norm.bias': 0.005862885154783726, 'model.decoder.layers.13.fc1.weight': 4.259213892510161e-05, 'model.decoder.layers.13.fc1.bias': 0.00028196259518153965, 'model.decoder.layers.13.fc2.weight': 1.934450892804307e-06, 'model.decoder.layers.13.fc2.bias': 0.006977382116019726, 'model.decoder.layers.13.final_layer_norm.weight': 0.004070748575031757, 'model.decoder.layers.13.final_layer_norm.bias': 0.0012171454727649689, 'model.decoder.layers.14.self_attn.k_proj.weight': 2.5059925974346697e-05, 'model.decoder.layers.14.self_attn.k_proj.bias': 7.952873914973679e-10, 'model.decoder.layers.14.self_attn.v_proj.weight': 9.57314478000626e-05, 'model.decoder.layers.14.self_attn.v_proj.bias': 0.009776213206350803, 'model.decoder.layers.14.self_attn.q_proj.weight': 9.453647180635016e-06, 'model.decoder.layers.14.self_attn.q_proj.bias': 3.741658292710781e-05, 'model.decoder.layers.14.self_attn.out_proj.weight': 4.4570078898686916e-05, 'model.decoder.layers.14.self_attn.out_proj.bias': 0.004945332184433937, 'model.decoder.layers.14.self_attn_layer_norm.weight': 0.0027952108066529036, 'model.decoder.layers.14.self_attn_layer_norm.bias': 0.0017658316064625978, 'model.decoder.layers.14.fc1.weight': 2.2840002202428877e-05, 'model.decoder.layers.14.fc1.bias': 0.00041145997238345444, 'model.decoder.layers.14.fc2.weight': 9.089013474294916e-06, 'model.decoder.layers.14.fc2.bias': 0.0013153913896530867, 'model.decoder.layers.14.final_layer_norm.weight': 0.00015770667232573032, 'model.decoder.layers.14.final_layer_norm.bias': 0.0003043359611183405, 'model.decoder.layers.15.self_attn.k_proj.weight': 1.691165925876703e-05, 'model.decoder.layers.15.self_attn.k_proj.bias': 1.7975807509174047e-09, 'model.decoder.layers.15.self_attn.v_proj.weight': 6.160035263746977e-05, 'model.decoder.layers.15.self_attn.v_proj.bias': 0.004183325916528702, 'model.decoder.layers.15.self_attn.q_proj.weight': 9.07519097381737e-06, 'model.decoder.layers.15.self_attn.q_proj.bias': 0.001580888987518847, 'model.decoder.layers.15.self_attn.out_proj.weight': 4.506875484366901e-06, 'model.decoder.layers.15.self_attn.out_proj.bias': 0.00036701885983347893, 'model.decoder.layers.15.self_attn_layer_norm.weight': 0.0014215430710464716, 'model.decoder.layers.15.self_attn_layer_norm.bias': 0.000428510713391006, 'model.decoder.layers.15.fc1.weight': 8.200007869163528e-06, 'model.decoder.layers.15.fc1.bias': 0.0004673587391152978, 'model.decoder.layers.15.fc2.weight': 9.591492016625125e-06, 'model.decoder.layers.15.fc2.bias': 0.008664260618388653, 'model.decoder.layers.15.final_layer_norm.weight': 0.00045877337106503546, 'model.decoder.layers.15.final_layer_norm.bias': 3.923231270164251e-05, 'model.decoder.layers.16.self_attn.k_proj.weight': 8.037301995500457e-06, 'model.decoder.layers.16.self_attn.k_proj.bias': 3.4562219752842793e-10, 'model.decoder.layers.16.self_attn.v_proj.weight': 0.0003743779961951077, 'model.decoder.layers.16.self_attn.v_proj.bias': 0.005785172805190086, 'model.decoder.layers.16.self_attn.q_proj.weight': 2.8230115276528522e-05, 'model.decoder.layers.16.self_attn.q_proj.bias': 0.00019174485350959003, 'model.decoder.layers.16.self_attn.out_proj.weight': 1.0935635145870037e-05, 'model.decoder.layers.16.self_attn.out_proj.bias': 0.003971535246819258, 'model.decoder.layers.16.self_attn_layer_norm.weight': 0.00021049194037914276, 'model.decoder.layers.16.self_attn_layer_norm.bias': 0.0005579772405326366, 'model.decoder.layers.16.fc1.weight': 1.307657021243358e-05, 'model.decoder.layers.16.fc1.bias': 0.00021872477373108268, 'model.decoder.layers.16.fc2.weight': 9.013900125864893e-06, 'model.decoder.layers.16.fc2.bias': 0.006086578592658043, 'model.decoder.layers.16.final_layer_norm.weight': 0.000611298019066453, 'model.decoder.layers.16.final_layer_norm.bias': 0.0002765201497823, 'model.decoder.layers.17.self_attn.k_proj.weight': 2.1650248527294025e-05, 'model.decoder.layers.17.self_attn.k_proj.bias': 1.0046452558754027e-10, 'model.decoder.layers.17.self_attn.v_proj.weight': 0.00011780836939578876, 'model.decoder.layers.17.self_attn.v_proj.bias': 0.009902569465339184, 'model.decoder.layers.17.self_attn.q_proj.weight': 9.811848940444179e-06, 'model.decoder.layers.17.self_attn.q_proj.bias': 0.0010778481373563409, 'model.decoder.layers.17.self_attn.out_proj.weight': 3.1169627618510276e-05, 'model.decoder.layers.17.self_attn.out_proj.bias': 0.0014936437364667654, 'model.decoder.layers.17.self_attn_layer_norm.weight': 0.0011217626743018627, 'model.decoder.layers.17.self_attn_layer_norm.bias': 0.005995844025164843, 'model.decoder.layers.17.fc1.weight': 1.495193373557413e-05, 'model.decoder.layers.17.fc1.bias': 8.811787120066583e-05, 'model.decoder.layers.17.fc2.weight': 5.135842684467207e-07, 'model.decoder.layers.17.fc2.bias': 0.004266152158379555, 'model.decoder.layers.17.final_layer_norm.weight': 0.0015217175241559744, 'model.decoder.layers.17.final_layer_norm.bias': 0.0006807037862017751, 'model.decoder.layers.18.self_attn.k_proj.weight': 8.06709340395173e-06, 'model.decoder.layers.18.self_attn.k_proj.bias': 2.382538610845586e-10, 'model.decoder.layers.18.self_attn.v_proj.weight': 0.00010729266796261072, 'model.decoder.layers.18.self_attn.v_proj.bias': 0.0002375934855081141, 'model.decoder.layers.18.self_attn.q_proj.weight': 1.3824549569108058e-05, 'model.decoder.layers.18.self_attn.q_proj.bias': 0.0008267344674095511, 'model.decoder.layers.18.self_attn.out_proj.weight': 4.728955536847934e-05, 'model.decoder.layers.18.self_attn.out_proj.bias': 0.00023239030269905925, 'model.decoder.layers.18.self_attn_layer_norm.weight': 0.0017888545989990234, 'model.decoder.layers.18.self_attn_layer_norm.bias': 0.0026490602176636457, 'model.decoder.layers.18.fc1.weight': 2.582679189799819e-05, 'model.decoder.layers.18.fc1.bias': 1.660519774304703e-05, 'model.decoder.layers.18.fc2.weight': 7.1948597906157374e-06, 'model.decoder.layers.18.fc2.bias': 0.0015473762759938836, 'model.decoder.layers.18.final_layer_norm.weight': 9.034632239490747e-05, 'model.decoder.layers.18.final_layer_norm.bias': 0.0019304485758766532, 'model.decoder.layers.19.self_attn.k_proj.weight': 2.0197403500787914e-05, 'model.decoder.layers.19.self_attn.k_proj.bias': 1.303785968076454e-10, 'model.decoder.layers.19.self_attn.v_proj.weight': 0.0002107986219925806, 'model.decoder.layers.19.self_attn.v_proj.bias': 0.0008111624629236758, 'model.decoder.layers.19.self_attn.q_proj.weight': 1.4097247913014144e-05, 'model.decoder.layers.19.self_attn.q_proj.bias': 4.587244620779529e-05, 'model.decoder.layers.19.self_attn.out_proj.weight': 3.4978158510057256e-05, 'model.decoder.layers.19.self_attn.out_proj.bias': 0.0005899042589589953, 'model.decoder.layers.19.self_attn_layer_norm.weight': 0.0013289607595652342, 'model.decoder.layers.19.self_attn_layer_norm.bias': 0.0023845075629651546, 'model.decoder.layers.19.fc1.weight': 1.2554923159768805e-05, 'model.decoder.layers.19.fc1.bias': 0.0001672703365329653, 'model.decoder.layers.19.fc2.weight': 6.046268026693724e-06, 'model.decoder.layers.19.fc2.bias': 0.002383989281952381, 'model.decoder.layers.19.final_layer_norm.weight': 0.0005191607633605599, 'model.decoder.layers.19.final_layer_norm.bias': 0.0011610982473939657, 'model.decoder.layers.20.self_attn.k_proj.weight': 2.1556634237640537e-05, 'model.decoder.layers.20.self_attn.k_proj.bias': 1.950688499618991e-10, 'model.decoder.layers.20.self_attn.v_proj.weight': 6.871994264656678e-05, 'model.decoder.layers.20.self_attn.v_proj.bias': 0.0016091763973236084, 'model.decoder.layers.20.self_attn.q_proj.weight': 1.1541615094756708e-05, 'model.decoder.layers.20.self_attn.q_proj.bias': 0.0002420599339529872, 'model.decoder.layers.20.self_attn.out_proj.weight': 6.485556696134154e-06, 'model.decoder.layers.20.self_attn.out_proj.bias': 0.0017362730577588081, 'model.decoder.layers.20.self_attn_layer_norm.weight': 0.000966782565228641, 'model.decoder.layers.20.self_attn_layer_norm.bias': 0.0024521793238818645, 'model.decoder.layers.20.fc1.weight': 2.220020178356208e-06, 'model.decoder.layers.20.fc1.bias': 0.00023903901455923915, 'model.decoder.layers.20.fc2.weight': 5.11652751811198e-06, 'model.decoder.layers.20.fc2.bias': 0.001610452076420188, 'model.decoder.layers.20.final_layer_norm.weight': 0.0006391120259650052, 'model.decoder.layers.20.final_layer_norm.bias': 0.00036247906973585486, 'model.decoder.layers.21.self_attn.k_proj.weight': 2.875465725082904e-06, 'model.decoder.layers.21.self_attn.k_proj.bias': 1.0963829844001793e-10, 'model.decoder.layers.21.self_attn.v_proj.weight': 0.0001076810949598439, 'model.decoder.layers.21.self_attn.v_proj.bias': 0.0012785817962139845, 'model.decoder.layers.21.self_attn.q_proj.weight': 5.267726010060869e-06, 'model.decoder.layers.21.self_attn.q_proj.bias': 0.0005636128480546176, 'model.decoder.layers.21.self_attn.out_proj.weight': 1.0467017091286834e-05, 'model.decoder.layers.21.self_attn.out_proj.bias': 0.0006349491886794567, 'model.decoder.layers.21.self_attn_layer_norm.weight': 9.788910392671824e-05, 'model.decoder.layers.21.self_attn_layer_norm.bias': 0.0018109140219166875, 'model.decoder.layers.21.fc1.weight': 1.0524420758883934e-05, 'model.decoder.layers.21.fc1.bias': 0.0001395009458065033, 'model.decoder.layers.21.fc2.weight': 7.825683496776037e-07, 'model.decoder.layers.21.fc2.bias': 0.00027287568082101643, 'model.decoder.layers.21.final_layer_norm.weight': 0.002189337508752942, 'model.decoder.layers.21.final_layer_norm.bias': 0.0003936631546821445, 'model.decoder.layers.22.self_attn.k_proj.weight': 7.157395884860307e-06, 'model.decoder.layers.22.self_attn.k_proj.bias': 1.2284405714879654e-09, 'model.decoder.layers.22.self_attn.v_proj.weight': 0.0001079331268556416, 'model.decoder.layers.22.self_attn.v_proj.bias': 0.0024641603231430054, 'model.decoder.layers.22.self_attn.q_proj.weight': 2.9348946554819122e-05, 'model.decoder.layers.22.self_attn.q_proj.bias': 0.00046467551146633923, 'model.decoder.layers.22.self_attn.out_proj.weight': 1.1630811059148982e-05, 'model.decoder.layers.22.self_attn.out_proj.bias': 1.3318858691491187e-05, 'model.decoder.layers.22.self_attn_layer_norm.weight': 0.0024139797315001488, 'model.decoder.layers.22.self_attn_layer_norm.bias': 0.0008520184201188385, 'model.decoder.layers.22.fc1.weight': 1.299438372370787e-05, 'model.decoder.layers.22.fc1.bias': 6.156826566439122e-05, 'model.decoder.layers.22.fc2.weight': 6.837515229562996e-06, 'model.decoder.layers.22.fc2.bias': 0.000750359205994755, 'model.decoder.layers.22.final_layer_norm.weight': 0.0005745317903347313, 'model.decoder.layers.22.final_layer_norm.bias': 0.00038529568701051176, 'model.decoder.layers.23.self_attn.k_proj.weight': 0.0008154936949722469, 'model.decoder.layers.23.self_attn.k_proj.bias': 1.5340060599555727e-07, 'model.decoder.layers.23.self_attn.v_proj.weight': 1.653862273087725e-05, 'model.decoder.layers.23.self_attn.v_proj.bias': 0.0008790317224338651, 'model.decoder.layers.23.self_attn.q_proj.weight': 9.975660759664606e-06, 'model.decoder.layers.23.self_attn.q_proj.bias': 0.0004680745187215507, 'model.decoder.layers.23.self_attn.out_proj.weight': 2.2089188860263675e-06, 'model.decoder.layers.23.self_attn.out_proj.bias': 0.00054021121468395, 'model.decoder.layers.23.self_attn_layer_norm.weight': 0.05621916800737381, 'model.decoder.layers.23.self_attn_layer_norm.bias': 0.0024355368223041296, 'model.decoder.layers.23.fc1.weight': 1.85953795153182e-05, 'model.decoder.layers.23.fc1.bias': 9.960689203580841e-05, 'model.decoder.layers.23.fc2.weight': 1.5280495517799864e-06, 'model.decoder.layers.23.fc2.bias': 0.0008676279685460031, 'model.decoder.layers.23.final_layer_norm.weight': 0.0021101871971040964, 'model.decoder.layers.23.final_layer_norm.bias': 0.0014080661348998547}\n",
    "hessian_trace = {'model.decoder.embed_tokens.weight': 1194.9275919596355, 'model.decoder.embed_positions.weight': 62.206461588541664, 'model.decoder.final_layer_norm.weight': 9.23440663019816, 'model.decoder.final_layer_norm.bias': 3.905731995900472, 'model.decoder.layers.0.self_attn.k_proj.weight': 80.81728871663411, 'model.decoder.layers.0.self_attn.k_proj.bias': 4.4644428953688475e-07, 'model.decoder.layers.0.self_attn.v_proj.weight': 199.46858723958334, 'model.decoder.layers.0.self_attn.v_proj.bias': 16.865546941757202, 'model.decoder.layers.0.self_attn.q_proj.weight': 122.04016367594402, 'model.decoder.layers.0.self_attn.q_proj.bias': 1.4802623589833577, 'model.decoder.layers.0.self_attn.out_proj.weight': 332.82420857747394, 'model.decoder.layers.0.self_attn.out_proj.bias': 86.57215690612793, 'model.decoder.layers.0.self_attn_layer_norm.weight': 1.176854819059372, 'model.decoder.layers.0.self_attn_layer_norm.bias': 3.8826220830281577, 'model.decoder.layers.0.fc1.weight': 1026.0242919921875, 'model.decoder.layers.0.fc1.bias': 5.837026913960774, 'model.decoder.layers.0.fc2.weight': 434.50232950846356, 'model.decoder.layers.0.fc2.bias': 10.715811093648275, 'model.decoder.layers.0.final_layer_norm.weight': 1.1563409169514973, 'model.decoder.layers.0.final_layer_norm.bias': 0.8455956776936849, 'model.decoder.layers.1.self_attn.k_proj.weight': 9.219987392425537, 'model.decoder.layers.1.self_attn.k_proj.bias': 1.667769045828512e-07, 'model.decoder.layers.1.self_attn.v_proj.weight': 1181.0137125651042, 'model.decoder.layers.1.self_attn.v_proj.bias': 7.343598206837972, 'model.decoder.layers.1.self_attn.q_proj.weight': 13.207474072774252, 'model.decoder.layers.1.self_attn.q_proj.bias': 1.038723607858022, 'model.decoder.layers.1.self_attn.out_proj.weight': 23.346385955810547, 'model.decoder.layers.1.self_attn.out_proj.bias': 25.51119613647461, 'model.decoder.layers.1.self_attn_layer_norm.weight': 1.7841885884602864, 'model.decoder.layers.1.self_attn_layer_norm.bias': 0.32044434547424316, 'model.decoder.layers.1.fc1.weight': 496.04359944661456, 'model.decoder.layers.1.fc1.bias': 4.226608117421468, 'model.decoder.layers.1.fc2.weight': 82.67283121744792, 'model.decoder.layers.1.fc2.bias': 4.699393590291341, 'model.decoder.layers.1.final_layer_norm.weight': 0.5279660224914551, 'model.decoder.layers.1.final_layer_norm.bias': 2.80998166402181, 'model.decoder.layers.2.self_attn.k_proj.weight': 50.17959785461426, 'model.decoder.layers.2.self_attn.k_proj.bias': 8.968943348008906e-07, 'model.decoder.layers.2.self_attn.v_proj.weight': 406.1774190266927, 'model.decoder.layers.2.self_attn.v_proj.bias': 34.865872065226235, 'model.decoder.layers.2.self_attn.q_proj.weight': 73.74266052246094, 'model.decoder.layers.2.self_attn.q_proj.bias': 0.4287129243214925, 'model.decoder.layers.2.self_attn.out_proj.weight': 159.37913386027017, 'model.decoder.layers.2.self_attn.out_proj.bias': 6.17769734064738, 'model.decoder.layers.2.self_attn_layer_norm.weight': 3.1265578269958496, 'model.decoder.layers.2.self_attn_layer_norm.bias': 1.1245239575703938, 'model.decoder.layers.2.fc1.weight': 1249.8432515462239, 'model.decoder.layers.2.fc1.bias': 17.075202624003094, 'model.decoder.layers.2.fc2.weight': 95.0023930867513, 'model.decoder.layers.2.fc2.bias': 8.266383488972982, 'model.decoder.layers.2.final_layer_norm.weight': 0.3421815236409505, 'model.decoder.layers.2.final_layer_norm.bias': 4.646793524424235, 'model.decoder.layers.3.self_attn.k_proj.weight': 31.99148464202881, 'model.decoder.layers.3.self_attn.k_proj.bias': 5.800834514957387e-07, 'model.decoder.layers.3.self_attn.v_proj.weight': 1177.5484822591145, 'model.decoder.layers.3.self_attn.v_proj.bias': 12.81634553273519, 'model.decoder.layers.3.self_attn.q_proj.weight': 159.7560297648112, 'model.decoder.layers.3.self_attn.q_proj.bias': 1.7579676707585652, 'model.decoder.layers.3.self_attn.out_proj.weight': 30.28198496500651, 'model.decoder.layers.3.self_attn.out_proj.bias': 4.435482978820801, 'model.decoder.layers.3.self_attn_layer_norm.weight': 13.2084059715271, 'model.decoder.layers.3.self_attn_layer_norm.bias': 4.749482790629069, 'model.decoder.layers.3.fc1.weight': 217.31351216634116, 'model.decoder.layers.3.fc1.bias': 1.0110915501912434, 'model.decoder.layers.3.fc2.weight': 26.99854024251302, 'model.decoder.layers.3.fc2.bias': 31.778201421101887, 'model.decoder.layers.3.final_layer_norm.weight': 21.061067899068195, 'model.decoder.layers.3.final_layer_norm.bias': 2.965498447418213, 'model.decoder.layers.4.self_attn.k_proj.weight': 80.69611740112305, 'model.decoder.layers.4.self_attn.k_proj.bias': 1.0404449615937967e-06, 'model.decoder.layers.4.self_attn.v_proj.weight': 1757.7148030598958, 'model.decoder.layers.4.self_attn.v_proj.bias': 12.15992546081543, 'model.decoder.layers.4.self_attn.q_proj.weight': 120.78164800008138, 'model.decoder.layers.4.self_attn.q_proj.bias': 0.05103441079457601, 'model.decoder.layers.4.self_attn.out_proj.weight': 81.84792073567708, 'model.decoder.layers.4.self_attn.out_proj.bias': 5.4139251708984375, 'model.decoder.layers.4.self_attn_layer_norm.weight': 5.0145918528238935, 'model.decoder.layers.4.self_attn_layer_norm.bias': 6.521610577901204, 'model.decoder.layers.4.fc1.weight': 588.6539815266927, 'model.decoder.layers.4.fc1.bias': 7.932360013326009, 'model.decoder.layers.4.fc2.weight': 41.35539881388346, 'model.decoder.layers.4.fc2.bias': 26.083303451538086, 'model.decoder.layers.4.final_layer_norm.weight': 1.0110756556193035, 'model.decoder.layers.4.final_layer_norm.bias': 10.816126108169556, 'model.decoder.layers.5.self_attn.k_proj.weight': 23.25175412495931, 'model.decoder.layers.5.self_attn.k_proj.bias': 4.583553163683973e-07, 'model.decoder.layers.5.self_attn.v_proj.weight': 977.9918212890625, 'model.decoder.layers.5.self_attn.v_proj.bias': 19.987658818562824, 'model.decoder.layers.5.self_attn.q_proj.weight': 117.74191029866536, 'model.decoder.layers.5.self_attn.q_proj.bias': 1.2908051510651906, 'model.decoder.layers.5.self_attn.out_proj.weight': 72.65645345052083, 'model.decoder.layers.5.self_attn.out_proj.bias': 23.379069010416668, 'model.decoder.layers.5.self_attn_layer_norm.weight': 3.5170437494913735, 'model.decoder.layers.5.self_attn_layer_norm.bias': 16.186225255330402, 'model.decoder.layers.5.fc1.weight': 11.558486938476562, 'model.decoder.layers.5.fc1.bias': 3.054628372192383, 'model.decoder.layers.5.fc2.weight': 104.43958695729573, 'model.decoder.layers.5.fc2.bias': 39.510135650634766, 'model.decoder.layers.5.final_layer_norm.weight': 0.47280871868133545, 'model.decoder.layers.5.final_layer_norm.bias': 6.2945131460825605, 'model.decoder.layers.6.self_attn.k_proj.weight': 9.65622329711914, 'model.decoder.layers.6.self_attn.k_proj.bias': 1.539022681148102e-07, 'model.decoder.layers.6.self_attn.v_proj.weight': 3969.3793131510415, 'model.decoder.layers.6.self_attn.v_proj.bias': 0.15558862686157227, 'model.decoder.layers.6.self_attn.q_proj.weight': 33.485870361328125, 'model.decoder.layers.6.self_attn.q_proj.bias': 1.1183207035064697, 'model.decoder.layers.6.self_attn.out_proj.weight': 211.32996877034506, 'model.decoder.layers.6.self_attn.out_proj.bias': 32.47879123687744, 'model.decoder.layers.6.self_attn_layer_norm.weight': 7.60649045308431, 'model.decoder.layers.6.self_attn_layer_norm.bias': 2.608099619547526, 'model.decoder.layers.6.fc1.weight': 295.0924479166667, 'model.decoder.layers.6.fc1.bias': 0.5748985211054484, 'model.decoder.layers.6.fc2.weight': 130.67133585611978, 'model.decoder.layers.6.fc2.bias': 57.66433970133463, 'model.decoder.layers.6.final_layer_norm.weight': 1.0882696310679119, 'model.decoder.layers.6.final_layer_norm.bias': 1.008333683013916, 'model.decoder.layers.7.self_attn.k_proj.weight': 11.613513310750326, 'model.decoder.layers.7.self_attn.k_proj.bias': 5.06440301251132e-07, 'model.decoder.layers.7.self_attn.v_proj.weight': 3297.1876627604165, 'model.decoder.layers.7.self_attn.v_proj.bias': 12.69955571492513, 'model.decoder.layers.7.self_attn.q_proj.weight': 112.87678782145183, 'model.decoder.layers.7.self_attn.q_proj.bias': 0.9876437584559122, 'model.decoder.layers.7.self_attn.out_proj.weight': 218.1019083658854, 'model.decoder.layers.7.self_attn.out_proj.bias': 42.892110188802086, 'model.decoder.layers.7.self_attn_layer_norm.weight': 15.2481476465861, 'model.decoder.layers.7.self_attn_layer_norm.bias': 1.5999202728271484, 'model.decoder.layers.7.fc1.weight': 589.8232116699219, 'model.decoder.layers.7.fc1.bias': 2.181494394938151, 'model.decoder.layers.7.fc2.weight': 126.20874532063802, 'model.decoder.layers.7.fc2.bias': 13.879932085673014, 'model.decoder.layers.7.final_layer_norm.weight': 2.940955638885498, 'model.decoder.layers.7.final_layer_norm.bias': 0.084258238474528, 'model.decoder.layers.8.self_attn.k_proj.weight': 100.79911295572917, 'model.decoder.layers.8.self_attn.k_proj.bias': 6.637437763856724e-07, 'model.decoder.layers.8.self_attn.v_proj.weight': 3215.857666015625, 'model.decoder.layers.8.self_attn.v_proj.bias': 10.213244756062826, 'model.decoder.layers.8.self_attn.q_proj.weight': 0.3197813034057617, 'model.decoder.layers.8.self_attn.q_proj.bias': 1.476491649945577, 'model.decoder.layers.8.self_attn.out_proj.weight': 85.60840352376302, 'model.decoder.layers.8.self_attn.out_proj.bias': 29.13232167561849, 'model.decoder.layers.8.self_attn_layer_norm.weight': 14.836044152577719, 'model.decoder.layers.8.self_attn_layer_norm.bias': 8.231045087178549, 'model.decoder.layers.8.fc1.weight': 1029.8172200520833, 'model.decoder.layers.8.fc1.bias': 14.799010515213013, 'model.decoder.layers.8.fc2.weight': 140.59540557861328, 'model.decoder.layers.8.fc2.bias': 27.155534426371258, 'model.decoder.layers.8.final_layer_norm.weight': 3.5487560033798218, 'model.decoder.layers.8.final_layer_norm.bias': 5.694772958755493, 'model.decoder.layers.9.self_attn.k_proj.weight': 20.521867752075195, 'model.decoder.layers.9.self_attn.k_proj.bias': 5.125757525092922e-07, 'model.decoder.layers.9.self_attn.v_proj.weight': 3243.8319498697915, 'model.decoder.layers.9.self_attn.v_proj.bias': 26.176570892333984, 'model.decoder.layers.9.self_attn.q_proj.weight': 19.8382085164388, 'model.decoder.layers.9.self_attn.q_proj.bias': 0.5916710992654165, 'model.decoder.layers.9.self_attn.out_proj.weight': 323.1460316975911, 'model.decoder.layers.9.self_attn.out_proj.bias': 28.43972905476888, 'model.decoder.layers.9.self_attn_layer_norm.weight': 4.2964372634887695, 'model.decoder.layers.9.self_attn_layer_norm.bias': 19.154499371846516, 'model.decoder.layers.9.fc1.weight': 325.1955108642578, 'model.decoder.layers.9.fc1.bias': 16.9618345896403, 'model.decoder.layers.9.fc2.weight': 36.228437741597496, 'model.decoder.layers.9.fc2.bias': 10.317483266194662, 'model.decoder.layers.9.final_layer_norm.weight': 5.6070036093393965, 'model.decoder.layers.9.final_layer_norm.bias': 4.4560166994730634, 'model.decoder.layers.10.self_attn.k_proj.weight': 40.140647888183594, 'model.decoder.layers.10.self_attn.k_proj.bias': 1.2212367437314242e-06, 'model.decoder.layers.10.self_attn.v_proj.weight': 3635.5814615885415, 'model.decoder.layers.10.self_attn.v_proj.bias': 4.02281379699707, 'model.decoder.layers.10.self_attn.q_proj.weight': 87.16917165120442, 'model.decoder.layers.10.self_attn.q_proj.bias': 0.6868895689646403, 'model.decoder.layers.10.self_attn.out_proj.weight': 40.987963358561196, 'model.decoder.layers.10.self_attn.out_proj.bias': 32.79821650187174, 'model.decoder.layers.10.self_attn_layer_norm.weight': 1.8764712015787761, 'model.decoder.layers.10.self_attn_layer_norm.bias': 9.303855578104654, 'model.decoder.layers.10.fc1.weight': 278.33945719401044, 'model.decoder.layers.10.fc1.bias': 2.5051854451497397, 'model.decoder.layers.10.fc2.weight': 83.3369852701823, 'model.decoder.layers.10.fc2.bias': 14.743237813313803, 'model.decoder.layers.10.final_layer_norm.weight': 3.912165323893229, 'model.decoder.layers.10.final_layer_norm.bias': 2.3821582794189453, 'model.decoder.layers.11.self_attn.k_proj.weight': 133.46747334798178, 'model.decoder.layers.11.self_attn.k_proj.bias': 4.210044911208873e-07, 'model.decoder.layers.11.self_attn.v_proj.weight': 3056.7123209635415, 'model.decoder.layers.11.self_attn.v_proj.bias': 8.785794576009115, 'model.decoder.layers.11.self_attn.q_proj.weight': 238.0325164794922, 'model.decoder.layers.11.self_attn.q_proj.bias': 1.6419847806294758, 'model.decoder.layers.11.self_attn.out_proj.weight': 260.1416371663411, 'model.decoder.layers.11.self_attn.out_proj.bias': 25.442824681599934, 'model.decoder.layers.11.self_attn_layer_norm.weight': 4.364767074584961, 'model.decoder.layers.11.self_attn_layer_norm.bias': 3.1743253072102866, 'model.decoder.layers.11.fc1.weight': 611.6233622233073, 'model.decoder.layers.11.fc1.bias': 3.2861900329589844, 'model.decoder.layers.11.fc2.weight': 182.40909322102866, 'model.decoder.layers.11.fc2.bias': 12.629422346750895, 'model.decoder.layers.11.final_layer_norm.weight': 1.7137949069341023, 'model.decoder.layers.11.final_layer_norm.bias': 7.593117078145345, 'model.decoder.layers.12.self_attn.k_proj.weight': 114.80877431233723, 'model.decoder.layers.12.self_attn.k_proj.bias': 1.3701719581149518e-07, 'model.decoder.layers.12.self_attn.v_proj.weight': 1478.7087809244792, 'model.decoder.layers.12.self_attn.v_proj.bias': 14.836765925089518, 'model.decoder.layers.12.self_attn.q_proj.weight': 48.580858866373696, 'model.decoder.layers.12.self_attn.q_proj.bias': 1.3795514702796936, 'model.decoder.layers.12.self_attn.out_proj.weight': 204.7934595743815, 'model.decoder.layers.12.self_attn.out_proj.bias': 16.134981791178387, 'model.decoder.layers.12.self_attn_layer_norm.weight': 1.0600868860880535, 'model.decoder.layers.12.self_attn_layer_norm.bias': 4.9434904257456465, 'model.decoder.layers.12.fc1.weight': 399.1716817220052, 'model.decoder.layers.12.fc1.bias': 2.676555315653483, 'model.decoder.layers.12.fc2.weight': 178.97567240397134, 'model.decoder.layers.12.fc2.bias': 8.133237838745117, 'model.decoder.layers.12.final_layer_norm.weight': 2.2795286178588867, 'model.decoder.layers.12.final_layer_norm.bias': 0.9722844759623209, 'model.decoder.layers.13.self_attn.k_proj.weight': 122.33266957600911, 'model.decoder.layers.13.self_attn.k_proj.bias': 5.167579123129448e-07, 'model.decoder.layers.13.self_attn.v_proj.weight': 2155.754638671875, 'model.decoder.layers.13.self_attn.v_proj.bias': 2.5755065282185874, 'model.decoder.layers.13.self_attn.q_proj.weight': 130.72259012858072, 'model.decoder.layers.13.self_attn.q_proj.bias': 0.0631372481584549, 'model.decoder.layers.13.self_attn.out_proj.weight': 232.62322998046875, 'model.decoder.layers.13.self_attn.out_proj.bias': 6.0946706136067705, 'model.decoder.layers.13.self_attn_layer_norm.weight': 1.5373609066009521, 'model.decoder.layers.13.self_attn_layer_norm.bias': 5.817979653676351, 'model.decoder.layers.13.fc1.weight': 235.78799057006836, 'model.decoder.layers.13.fc1.bias': 7.012344042460124, 'model.decoder.layers.13.fc2.weight': 88.86114565531413, 'model.decoder.layers.13.fc2.bias': 8.57457160949707, 'model.decoder.layers.13.final_layer_norm.weight': 3.601138432820638, 'model.decoder.layers.13.final_layer_norm.bias': 0.26936769485473633, 'model.decoder.layers.14.self_attn.k_proj.weight': 58.549051920572914, 'model.decoder.layers.14.self_attn.k_proj.bias': 1.050436064057673e-07, 'model.decoder.layers.14.self_attn.v_proj.weight': 1588.015116373698, 'model.decoder.layers.14.self_attn.v_proj.bias': 1.025929609934489, 'model.decoder.layers.14.self_attn.q_proj.weight': 195.23397318522134, 'model.decoder.layers.14.self_attn.q_proj.bias': 0.5062949657440186, 'model.decoder.layers.14.self_attn.out_proj.weight': 189.880859375, 'model.decoder.layers.14.self_attn.out_proj.bias': 24.7160218556722, 'model.decoder.layers.14.self_attn_layer_norm.weight': 0.4795061747233073, 'model.decoder.layers.14.self_attn_layer_norm.bias': 7.3194325764973955, 'model.decoder.layers.14.fc1.weight': 237.11504618326822, 'model.decoder.layers.14.fc1.bias': 0.6288999716440836, 'model.decoder.layers.14.fc2.weight': 300.7984568277995, 'model.decoder.layers.14.fc2.bias': 12.473860263824463, 'model.decoder.layers.14.final_layer_norm.weight': 5.3096497853597, 'model.decoder.layers.14.final_layer_norm.bias': 3.44138240814209, 'model.decoder.layers.15.self_attn.k_proj.weight': 89.43447367350261, 'model.decoder.layers.15.self_attn.k_proj.bias': 2.0174735861170725e-06, 'model.decoder.layers.15.self_attn.v_proj.weight': 976.4321492513021, 'model.decoder.layers.15.self_attn.v_proj.bias': 7.8637111981709795, 'model.decoder.layers.15.self_attn.q_proj.weight': 40.05120849609375, 'model.decoder.layers.15.self_attn.q_proj.bias': 1.86562579870224, 'model.decoder.layers.15.self_attn.out_proj.weight': 91.47459411621094, 'model.decoder.layers.15.self_attn.out_proj.bias': 9.201977014541626, 'model.decoder.layers.15.self_attn_layer_norm.weight': 8.13798983891805, 'model.decoder.layers.15.self_attn_layer_norm.bias': 4.080739339192708, 'model.decoder.layers.15.fc1.weight': 357.3152160644531, 'model.decoder.layers.15.fc1.bias': 2.50886599222819, 'model.decoder.layers.15.fc2.weight': 75.35965347290039, 'model.decoder.layers.15.fc2.bias': 1.269438107808431, 'model.decoder.layers.15.final_layer_norm.weight': 0.22304606437683105, 'model.decoder.layers.15.final_layer_norm.bias': 2.5818015336990356, 'model.decoder.layers.16.self_attn.k_proj.weight': 76.5325101216634, 'model.decoder.layers.16.self_attn.k_proj.bias': 2.3469116664879644e-07, 'model.decoder.layers.16.self_attn.v_proj.weight': 1183.2902018229167, 'model.decoder.layers.16.self_attn.v_proj.bias': 2.2639951705932617, 'model.decoder.layers.16.self_attn.q_proj.weight': 10.976270039876303, 'model.decoder.layers.16.self_attn.q_proj.bias': 0.7257786691188812, 'model.decoder.layers.16.self_attn.out_proj.weight': 214.39805603027344, 'model.decoder.layers.16.self_attn.out_proj.bias': 2.0170888900756836, 'model.decoder.layers.16.self_attn_layer_norm.weight': 3.77446182568868, 'model.decoder.layers.16.self_attn_layer_norm.bias': 6.8916622797648115, 'model.decoder.layers.16.fc1.weight': 312.0342610677083, 'model.decoder.layers.16.fc1.bias': 2.356794555981954, 'model.decoder.layers.16.fc2.weight': 173.55509440104166, 'model.decoder.layers.16.fc2.bias': 3.8546457290649414, 'model.decoder.layers.16.final_layer_norm.weight': 1.478039264678955, 'model.decoder.layers.16.final_layer_norm.bias': 2.4736698865890503, 'model.decoder.layers.17.self_attn.k_proj.weight': 74.0786501566569, 'model.decoder.layers.17.self_attn.k_proj.bias': 1.765959799134483e-07, 'model.decoder.layers.17.self_attn.v_proj.weight': 672.512196858724, 'model.decoder.layers.17.self_attn.v_proj.bias': 3.420607646306356, 'model.decoder.layers.17.self_attn.q_proj.weight': 135.22151947021484, 'model.decoder.layers.17.self_attn.q_proj.bias': 0.22574500242869058, 'model.decoder.layers.17.self_attn.out_proj.weight': 165.94742329915366, 'model.decoder.layers.17.self_attn.out_proj.bias': 6.086919943491618, 'model.decoder.layers.17.self_attn_layer_norm.weight': 1.4737507502237956, 'model.decoder.layers.17.self_attn_layer_norm.bias': 0.47539273897806805, 'model.decoder.layers.17.fc1.weight': 202.49539184570312, 'model.decoder.layers.17.fc1.bias': 2.116861661275228, 'model.decoder.layers.17.fc2.weight': 47.12660471598307, 'model.decoder.layers.17.fc2.bias': 5.959351936976115, 'model.decoder.layers.17.final_layer_norm.weight': 0.7635479966799418, 'model.decoder.layers.17.final_layer_norm.bias': 0.5586413145065308, 'model.decoder.layers.18.self_attn.k_proj.weight': 13.660836537679037, 'model.decoder.layers.18.self_attn.k_proj.bias': 7.450204672447095e-07, 'model.decoder.layers.18.self_attn.v_proj.weight': 583.4378763834635, 'model.decoder.layers.18.self_attn.v_proj.bias': 0.2318133513132731, 'model.decoder.layers.18.self_attn.q_proj.weight': 56.271016438802086, 'model.decoder.layers.18.self_attn.q_proj.bias': 0.44491873184839886, 'model.decoder.layers.18.self_attn.out_proj.weight': 28.96947987874349, 'model.decoder.layers.18.self_attn.out_proj.bias': 1.8864336013793945, 'model.decoder.layers.18.self_attn_layer_norm.weight': 5.648584405581157, 'model.decoder.layers.18.self_attn_layer_norm.bias': 1.0241370995839436, 'model.decoder.layers.18.fc1.weight': 82.8545633951823, 'model.decoder.layers.18.fc1.bias': 1.1417232354482014, 'model.decoder.layers.18.fc2.weight': 67.36198616027832, 'model.decoder.layers.18.fc2.bias': 0.23440690835316977, 'model.decoder.layers.18.final_layer_norm.weight': 0.851794163386027, 'model.decoder.layers.18.final_layer_norm.bias': 0.3095352252324422, 'model.decoder.layers.19.self_attn.k_proj.weight': 38.4573860168457, 'model.decoder.layers.19.self_attn.k_proj.bias': 5.608417268376797e-07, 'model.decoder.layers.19.self_attn.v_proj.weight': 577.9170532226562, 'model.decoder.layers.19.self_attn.v_proj.bias': 2.1673796971639, 'model.decoder.layers.19.self_attn.q_proj.weight': 41.061418533325195, 'model.decoder.layers.19.self_attn.q_proj.bias': 0.3846500714619954, 'model.decoder.layers.19.self_attn.out_proj.weight': 135.37067794799805, 'model.decoder.layers.19.self_attn.out_proj.bias': 1.2105334599812825, 'model.decoder.layers.19.self_attn_layer_norm.weight': 0.1599578062693278, 'model.decoder.layers.19.self_attn_layer_norm.bias': 2.913751403490702, 'model.decoder.layers.19.fc1.weight': 284.32102457682294, 'model.decoder.layers.19.fc1.bias': 1.2599942684173584, 'model.decoder.layers.19.fc2.weight': 4.3566131591796875, 'model.decoder.layers.19.fc2.bias': 3.6627607345581055, 'model.decoder.layers.19.final_layer_norm.weight': 0.6927271286646525, 'model.decoder.layers.19.final_layer_norm.bias': 0.2087437758843104, 'model.decoder.layers.20.self_attn.k_proj.weight': 23.685892740885418, 'model.decoder.layers.20.self_attn.k_proj.bias': 1.1672106362918082e-07, 'model.decoder.layers.20.self_attn.v_proj.weight': 624.1569519042969, 'model.decoder.layers.20.self_attn.v_proj.bias': 2.149170478185018, 'model.decoder.layers.20.self_attn.q_proj.weight': 110.40034612019856, 'model.decoder.layers.20.self_attn.q_proj.bias': 0.26976368327935535, 'model.decoder.layers.20.self_attn.out_proj.weight': 9.283737182617188, 'model.decoder.layers.20.self_attn.out_proj.bias': 0.5362516244252523, 'model.decoder.layers.20.self_attn_layer_norm.weight': 0.06875685850779216, 'model.decoder.layers.20.self_attn_layer_norm.bias': 4.817276040712993, 'model.decoder.layers.20.fc1.weight': 195.2688191731771, 'model.decoder.layers.20.fc1.bias': 0.41515107949574787, 'model.decoder.layers.20.fc2.weight': 84.10826841990153, 'model.decoder.layers.20.fc2.bias': 3.36245059967041, 'model.decoder.layers.20.final_layer_norm.weight': 0.5196702281634012, 'model.decoder.layers.20.final_layer_norm.bias': 0.7786722580591837, 'model.decoder.layers.21.self_attn.k_proj.weight': 90.89690272013347, 'model.decoder.layers.21.self_attn.k_proj.bias': 2.7367506542456493e-07, 'model.decoder.layers.21.self_attn.v_proj.weight': 447.6248372395833, 'model.decoder.layers.21.self_attn.v_proj.bias': 1.1513359546661377, 'model.decoder.layers.21.self_attn.q_proj.weight': 56.912890116373696, 'model.decoder.layers.21.self_attn.q_proj.bias': 0.3401174147923787, 'model.decoder.layers.21.self_attn.out_proj.weight': 92.01056448618571, 'model.decoder.layers.21.self_attn.out_proj.bias': 2.642144759496053, 'model.decoder.layers.21.self_attn_layer_norm.weight': 0.012430349985758463, 'model.decoder.layers.21.self_attn_layer_norm.bias': 1.4953891436258953, 'model.decoder.layers.21.fc1.weight': 182.3254648844401, 'model.decoder.layers.21.fc1.bias': 0.4217895766099294, 'model.decoder.layers.21.fc2.weight': 48.591383616129555, 'model.decoder.layers.21.fc2.bias': 1.6303943395614624, 'model.decoder.layers.21.final_layer_norm.weight': 0.1133418579896291, 'model.decoder.layers.21.final_layer_norm.bias': 0.8550681173801422, 'model.decoder.layers.22.self_attn.k_proj.weight': 55.32455571492513, 'model.decoder.layers.22.self_attn.k_proj.bias': 5.932955673415563e-07, 'model.decoder.layers.22.self_attn.v_proj.weight': 260.07740783691406, 'model.decoder.layers.22.self_attn.v_proj.bias': 0.6635967095692953, 'model.decoder.layers.22.self_attn.q_proj.weight': 41.74598375956217, 'model.decoder.layers.22.self_attn.q_proj.bias': 0.7733224692444006, 'model.decoder.layers.22.self_attn.out_proj.weight': 7.186656634012858, 'model.decoder.layers.22.self_attn.out_proj.bias': 2.8045431772867837, 'model.decoder.layers.22.self_attn_layer_norm.weight': 0.10426807403564453, 'model.decoder.layers.22.self_attn_layer_norm.bias': 1.7604074478149414, 'model.decoder.layers.22.fc1.weight': 323.54155985514325, 'model.decoder.layers.22.fc1.bias': 3.094419479370117, 'model.decoder.layers.22.fc2.weight': 125.38926951090495, 'model.decoder.layers.22.fc2.bias': 1.9736011028289795, 'model.decoder.layers.22.final_layer_norm.weight': 3.3347854614257812, 'model.decoder.layers.22.final_layer_norm.bias': 1.4577517807483673, 'model.decoder.layers.23.self_attn.k_proj.weight': 2116.7174072265625, 'model.decoder.layers.23.self_attn.k_proj.bias': 0.0005777936894446611, 'model.decoder.layers.23.self_attn.v_proj.weight': 259.937255859375, 'model.decoder.layers.23.self_attn.v_proj.bias': 2.886035362879435, 'model.decoder.layers.23.self_attn.q_proj.weight': 23.766716639200848, 'model.decoder.layers.23.self_attn.q_proj.bias': 0.33590322732925415, 'model.decoder.layers.23.self_attn.out_proj.weight': 15.94110107421875, 'model.decoder.layers.23.self_attn.out_proj.bias': 2.1962711811065674, 'model.decoder.layers.23.self_attn_layer_norm.weight': 116.557679494222, 'model.decoder.layers.23.self_attn_layer_norm.bias': 1.6408641537030537, 'model.decoder.layers.23.fc1.weight': 361.1596272786458, 'model.decoder.layers.23.fc1.bias': 0.11565979321797688, 'model.decoder.layers.23.fc2.weight': 151.1081339518229, 'model.decoder.layers.23.fc2.bias': 2.4938165744145713, 'model.decoder.layers.23.final_layer_norm.weight': 4.278749386469523, 'model.decoder.layers.23.final_layer_norm.bias': 2.4786187410354614}\n",
    "sensitivity = [0]*24\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.decoder.layers\"):\n",
    "       layer = int(name.split('.')[3])\n",
    "       sensitivity[layer] += trace\n",
    "print(sensitivity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "2f34c939-f4c1-4102-8c73-6b206de79f88",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.024750037265516694, 0.06347911567546533, 0.04707093544770524, 0.04599794041149252, 0.08218989528002929, 0.044483200854823046, 0.06627523331859564, 0.06360966167135373, 0.06675981033964362, 0.028765330516103305, 0.02846526448869291, 0.04828428689844699, 0.04114862483984094, 0.02663871526765149, 0.021715643293125808, 0.017720799965410095, 0.018353715486242095, 0.02634426509151755, 0.00952890520053451, 0.009690598873342227, 0.009973193956266613, 0.007518922651387205, 0.008155811201672525, 0.06589200420512498]\n"
     ]
    }
   ],
   "source": [
    "### \n",
    "hessian_trace = {'model.decoder.embed_tokens.weight': 1.1185309799289643e-05, 'model.decoder.embed_positions.weight': 8.143285607419362e-06, 'model.decoder.final_layer_norm.weight': 0.002161037642508745, 'model.decoder.final_layer_norm.bias': 0.004603024572134018, 'model.decoder.layers.0.self_attn.k_proj.weight': 4.645156877813861e-05, 'model.decoder.layers.0.self_attn.k_proj.bias': 3.031572493039647e-10, 'model.decoder.layers.0.self_attn.v_proj.weight': 1.66144673130475e-05, 'model.decoder.layers.0.self_attn.v_proj.bias': 0.0019134297035634518, 'model.decoder.layers.0.self_attn.q_proj.weight': 2.0651876184274442e-05, 'model.decoder.layers.0.self_attn.q_proj.bias': 0.00010981145896948874, 'model.decoder.layers.0.self_attn.out_proj.weight': 7.775293488521129e-05, 'model.decoder.layers.0.self_attn.out_proj.bias': 0.00035202689468860626, 'model.decoder.layers.0.self_attn_layer_norm.weight': 0.0013620768440887332, 'model.decoder.layers.0.self_attn_layer_norm.bias': 0.007437482010573149, 'model.decoder.layers.0.fc1.weight': 0.00012652759323827922, 'model.decoder.layers.0.fc1.bias': 0.0024492822121828794, 'model.decoder.layers.0.fc2.weight': 2.7213727662456222e-05, 'model.decoder.layers.0.fc2.bias': 0.008369727060198784, 'model.decoder.layers.0.final_layer_norm.weight': 0.001489672577008605, 'model.decoder.layers.0.final_layer_norm.bias': 0.0009513160330243409, 'model.decoder.layers.1.self_attn.k_proj.weight': 6.936030331416987e-06, 'model.decoder.layers.1.self_attn.k_proj.bias': 1.5926332475046934e-10, 'model.decoder.layers.1.self_attn.v_proj.weight': 0.00025346881011500955, 'model.decoder.layers.1.self_attn.v_proj.bias': 0.0021952157840132713, 'model.decoder.layers.1.self_attn.q_proj.weight': 1.818699274735991e-05, 'model.decoder.layers.1.self_attn.q_proj.bias': 0.0004335007688496262, 'model.decoder.layers.1.self_attn.out_proj.weight': 1.7706663129501976e-05, 'model.decoder.layers.1.self_attn.out_proj.bias': 0.010794132947921753, 'model.decoder.layers.1.self_attn_layer_norm.weight': 0.004621668718755245, 'model.decoder.layers.1.self_attn_layer_norm.bias': 0.006846331059932709, 'model.decoder.layers.1.fc1.weight': 5.2937732107238844e-05, 'model.decoder.layers.1.fc1.bias': 0.0014727330999448895, 'model.decoder.layers.1.fc2.weight': 7.398500201816205e-06, 'model.decoder.layers.1.fc2.bias': 0.03257054463028908, 'model.decoder.layers.1.final_layer_norm.weight': 0.0019106791587546468, 'model.decoder.layers.1.final_layer_norm.bias': 0.0022776746191084385, 'model.decoder.layers.2.self_attn.k_proj.weight': 6.018878593749832e-06, 'model.decoder.layers.2.self_attn.k_proj.bias': 2.9858293615347975e-10, 'model.decoder.layers.2.self_attn.v_proj.weight': 7.255269156303257e-05, 'model.decoder.layers.2.self_attn.v_proj.bias': 0.0014476566575467587, 'model.decoder.layers.2.self_attn.q_proj.weight': 6.528708581754472e-06, 'model.decoder.layers.2.self_attn.q_proj.bias': 0.001365584321320057, 'model.decoder.layers.2.self_attn.out_proj.weight': 1.976549538085237e-05, 'model.decoder.layers.2.self_attn.out_proj.bias': 0.009294230490922928, 'model.decoder.layers.2.self_attn_layer_norm.weight': 0.000535756116732955, 'model.decoder.layers.2.self_attn_layer_norm.bias': 0.001856120303273201, 'model.decoder.layers.2.fc1.weight': 3.177683538524434e-05, 'model.decoder.layers.2.fc1.bias': 2.412831236142665e-05, 'model.decoder.layers.2.fc2.weight': 1.818887085391907e-07, 'model.decoder.layers.2.fc2.bias': 0.022226765751838684, 'model.decoder.layers.2.final_layer_norm.weight': 0.007943467237055302, 'model.decoder.layers.2.final_layer_norm.bias': 0.0022404014598578215, 'model.decoder.layers.3.self_attn.k_proj.weight': 1.151437663793331e-05, 'model.decoder.layers.3.self_attn.k_proj.bias': 3.3940850130420586e-10, 'model.decoder.layers.3.self_attn.v_proj.weight': 4.8154910473385826e-05, 'model.decoder.layers.3.self_attn.v_proj.bias': 0.01699797995388508, 'model.decoder.layers.3.self_attn.q_proj.weight': 2.3673514078836888e-05, 'model.decoder.layers.3.self_attn.q_proj.bias': 0.0005298053729347885, 'model.decoder.layers.3.self_attn.out_proj.weight': 2.1154875867068768e-05, 'model.decoder.layers.3.self_attn.out_proj.bias': 0.0014614267274737358, 'model.decoder.layers.3.self_attn_layer_norm.weight': 0.005598359275609255, 'model.decoder.layers.3.self_attn_layer_norm.bias': 0.000933143135625869, 'model.decoder.layers.3.fc1.weight': 4.553344115265645e-05, 'model.decoder.layers.3.fc1.bias': 0.0012870586942881346, 'model.decoder.layers.3.fc2.weight': 1.06591141957324e-05, 'model.decoder.layers.3.fc2.bias': 0.012304415926337242, 'model.decoder.layers.3.final_layer_norm.weight': 0.0007680617272853851, 'model.decoder.layers.3.final_layer_norm.bias': 0.005956999026238918, 'model.decoder.layers.4.self_attn.k_proj.weight': 2.699195647437591e-06, 'model.decoder.layers.4.self_attn.k_proj.bias': 7.5040951230676e-10, 'model.decoder.layers.4.self_attn.v_proj.weight': 0.00021728238789364696, 'model.decoder.layers.4.self_attn.v_proj.bias': 0.0034294729121029377, 'model.decoder.layers.4.self_attn.q_proj.weight': 4.4106283894507214e-05, 'model.decoder.layers.4.self_attn.q_proj.bias': 0.0004717133124358952, 'model.decoder.layers.4.self_attn.out_proj.weight': 3.1982865039026365e-05, 'model.decoder.layers.4.self_attn.out_proj.bias': 0.04290454462170601, 'model.decoder.layers.4.self_attn_layer_norm.weight': 0.0024552810937166214, 'model.decoder.layers.4.self_attn_layer_norm.bias': 0.0026056580245494843, 'model.decoder.layers.4.fc1.weight': 3.265196210122667e-05, 'model.decoder.layers.4.fc1.bias': 0.0013350797817111015, 'model.decoder.layers.4.fc2.weight': 6.125213985797018e-06, 'model.decoder.layers.4.fc2.bias': 0.0204729363322258, 'model.decoder.layers.4.final_layer_norm.weight': 0.006313554476946592, 'model.decoder.layers.4.final_layer_norm.bias': 0.0018668060656636953, 'model.decoder.layers.5.self_attn.k_proj.weight': 1.9550689103198238e-05, 'model.decoder.layers.5.self_attn.k_proj.bias': 4.5481396426794163e-10, 'model.decoder.layers.5.self_attn.v_proj.weight': 0.0002496642409823835, 'model.decoder.layers.5.self_attn.v_proj.bias': 0.002697425428777933, 'model.decoder.layers.5.self_attn.q_proj.weight': 1.265824175789021e-05, 'model.decoder.layers.5.self_attn.q_proj.bias': 0.0002674320712685585, 'model.decoder.layers.5.self_attn.out_proj.weight': 0.00011045326391467825, 'model.decoder.layers.5.self_attn.out_proj.bias': 0.014382628723978996, 'model.decoder.layers.5.self_attn_layer_norm.weight': 0.001466980203986168, 'model.decoder.layers.5.self_attn_layer_norm.bias': 0.0034044026397168636, 'model.decoder.layers.5.fc1.weight': 5.873205282114213e-06, 'model.decoder.layers.5.fc1.bias': 8.20957066025585e-05, 'model.decoder.layers.5.fc2.weight': 4.40867825091118e-06, 'model.decoder.layers.5.fc2.bias': 0.018928296864032745, 'model.decoder.layers.5.final_layer_norm.weight': 0.0011564877349883318, 'model.decoder.layers.5.final_layer_norm.bias': 0.0016948427073657513, 'model.decoder.layers.6.self_attn.k_proj.weight': 1.5418418115586974e-06, 'model.decoder.layers.6.self_attn.k_proj.bias': 2.9045921223769255e-10, 'model.decoder.layers.6.self_attn.v_proj.weight': 0.0007602623663842678, 'model.decoder.layers.6.self_attn.v_proj.bias': 0.011044119484722614, 'model.decoder.layers.6.self_attn.q_proj.weight': 2.2584390535485e-07, 'model.decoder.layers.6.self_attn.q_proj.bias': 0.0008485889993607998, 'model.decoder.layers.6.self_attn.out_proj.weight': 3.578695759642869e-05, 'model.decoder.layers.6.self_attn.out_proj.bias': 0.008568225428462029, 'model.decoder.layers.6.self_attn_layer_norm.weight': 0.00713425362482667, 'model.decoder.layers.6.self_attn_layer_norm.bias': 0.014377156272530556, 'model.decoder.layers.6.fc1.weight': 4.1227933252230287e-05, 'model.decoder.layers.6.fc1.bias': 0.00033436791272833943, 'model.decoder.layers.6.fc2.weight': 6.13882457400905e-06, 'model.decoder.layers.6.fc2.bias': 0.018310433253645897, 'model.decoder.layers.6.final_layer_norm.weight': 0.0007947176927700639, 'model.decoder.layers.6.final_layer_norm.bias': 0.004018186591565609, 'model.decoder.layers.7.self_attn.k_proj.weight': 1.8655551684787497e-05, 'model.decoder.layers.7.self_attn.k_proj.bias': 4.4433434709389985e-10, 'model.decoder.layers.7.self_attn.v_proj.weight': 0.0004696718533523381, 'model.decoder.layers.7.self_attn.v_proj.bias': 0.010697084479033947, 'model.decoder.layers.7.self_attn.q_proj.weight': 1.0651410775608383e-05, 'model.decoder.layers.7.self_attn.q_proj.bias': 0.0009541820036247373, 'model.decoder.layers.7.self_attn.out_proj.weight': 2.0471183233894408e-05, 'model.decoder.layers.7.self_attn.out_proj.bias': 0.04106755927205086, 'model.decoder.layers.7.self_attn_layer_norm.weight': 0.003637113608419895, 'model.decoder.layers.7.self_attn_layer_norm.bias': 0.003511386923491955, 'model.decoder.layers.7.fc1.weight': 3.541999467415735e-05, 'model.decoder.layers.7.fc1.bias': 0.00017271609976887703, 'model.decoder.layers.7.fc2.weight': 6.586087692994624e-07, 'model.decoder.layers.7.fc2.bias': 0.0007914276793599129, 'model.decoder.layers.7.final_layer_norm.weight': 0.0016061868518590927, 'model.decoder.layers.7.final_layer_norm.bias': 0.0006164757069200277, 'model.decoder.layers.8.self_attn.k_proj.weight': 1.312786935159238e-05, 'model.decoder.layers.8.self_attn.k_proj.bias': 4.789022511886287e-10, 'model.decoder.layers.8.self_attn.v_proj.weight': 0.00083456642460078, 'model.decoder.layers.8.self_attn.v_proj.bias': 0.0013190273893997073, 'model.decoder.layers.8.self_attn.q_proj.weight': 4.527746204985306e-05, 'model.decoder.layers.8.self_attn.q_proj.bias': 0.0003894694964401424, 'model.decoder.layers.8.self_attn.out_proj.weight': 8.155950126820244e-06, 'model.decoder.layers.8.self_attn.out_proj.bias': 0.031470075249671936, 'model.decoder.layers.8.self_attn_layer_norm.weight': 0.0021222857758402824, 'model.decoder.layers.8.self_attn_layer_norm.bias': 0.0019304797751829028, 'model.decoder.layers.8.fc1.weight': 2.3366730601992458e-05, 'model.decoder.layers.8.fc1.bias': 0.0016706563765183091, 'model.decoder.layers.8.fc2.weight': 1.011752010526834e-05, 'model.decoder.layers.8.fc2.bias': 0.015386030077934265, 'model.decoder.layers.8.final_layer_norm.weight': 0.004658102057874203, 'model.decoder.layers.8.final_layer_norm.bias': 0.006879071705043316, 'model.decoder.layers.9.self_attn.k_proj.weight': 1.7018075595842674e-05, 'model.decoder.layers.9.self_attn.k_proj.bias': 7.391953715796262e-11, 'model.decoder.layers.9.self_attn.v_proj.weight': 0.0006141822086647153, 'model.decoder.layers.9.self_attn.v_proj.bias': 0.004593817517161369, 'model.decoder.layers.9.self_attn.q_proj.weight': 2.1341147657949477e-05, 'model.decoder.layers.9.self_attn.q_proj.bias': 0.0019551655277609825, 'model.decoder.layers.9.self_attn.out_proj.weight': 5.762240107287653e-05, 'model.decoder.layers.9.self_attn.out_proj.bias': 0.008541066199541092, 'model.decoder.layers.9.self_attn_layer_norm.weight': 0.00260370085015893, 'model.decoder.layers.9.self_attn_layer_norm.bias': 0.002429370302706957, 'model.decoder.layers.9.fc1.weight': 1.5558865925413556e-05, 'model.decoder.layers.9.fc1.bias': 0.0016150367446243763, 'model.decoder.layers.9.fc2.weight': 1.7371028661727905e-05, 'model.decoder.layers.9.fc2.bias': 0.0010262508876621723, 'model.decoder.layers.9.final_layer_norm.weight': 0.0006154445582069457, 'model.decoder.layers.9.final_layer_norm.bias': 0.004642384126782417, 'model.decoder.layers.10.self_attn.k_proj.weight': 1.7195801774505526e-05, 'model.decoder.layers.10.self_attn.k_proj.bias': 7.461231632532872e-11, 'model.decoder.layers.10.self_attn.v_proj.weight': 0.0006180479540489614, 'model.decoder.layers.10.self_attn.v_proj.bias': 0.0049537960439920425, 'model.decoder.layers.10.self_attn.q_proj.weight': 1.5819550753803924e-06, 'model.decoder.layers.10.self_attn.q_proj.bias': 0.0015959527809172869, 'model.decoder.layers.10.self_attn.out_proj.weight': 7.227572496049106e-05, 'model.decoder.layers.10.self_attn.out_proj.bias': 0.0029820981435477734, 'model.decoder.layers.10.self_attn_layer_norm.weight': 0.005492689553648233, 'model.decoder.layers.10.self_attn_layer_norm.bias': 0.0049917567521333694, 'model.decoder.layers.10.fc1.weight': 1.7441607269574888e-06, 'model.decoder.layers.10.fc1.bias': 0.000596581376157701, 'model.decoder.layers.10.fc2.weight': 8.243401680374518e-06, 'model.decoder.layers.10.fc2.bias': 0.0008291625417768955, 'model.decoder.layers.10.final_layer_norm.weight': 0.0004895663587376475, 'model.decoder.layers.10.final_layer_norm.bias': 0.005814571864902973, 'model.decoder.layers.11.self_attn.k_proj.weight': 1.6726547983125784e-05, 'model.decoder.layers.11.self_attn.k_proj.bias': 7.352944919603033e-10, 'model.decoder.layers.11.self_attn.v_proj.weight': 0.0005792105803266168, 'model.decoder.layers.11.self_attn.v_proj.bias': 0.0014454503543674946, 'model.decoder.layers.11.self_attn.q_proj.weight': 1.984708069358021e-05, 'model.decoder.layers.11.self_attn.q_proj.bias': 0.0017196566332131624, 'model.decoder.layers.11.self_attn.out_proj.weight': 7.973126048455015e-05, 'model.decoder.layers.11.self_attn.out_proj.bias': 0.0002526906318962574, 'model.decoder.layers.11.self_attn_layer_norm.weight': 0.0006384402513504028, 'model.decoder.layers.11.self_attn_layer_norm.bias': 0.014260699972510338, 'model.decoder.layers.11.fc1.weight': 3.445312177063897e-05, 'model.decoder.layers.11.fc1.bias': 0.00018588214879855514, 'model.decoder.layers.11.fc2.weight': 2.20234619519033e-06, 'model.decoder.layers.11.fc2.bias': 0.023138443008065224, 'model.decoder.layers.11.final_layer_norm.weight': 0.0038702962920069695, 'model.decoder.layers.11.final_layer_norm.bias': 0.0020405559334903955, 'model.decoder.layers.12.self_attn.k_proj.weight': 5.6471722018613946e-06, 'model.decoder.layers.12.self_attn.k_proj.bias': 5.683409298740116e-10, 'model.decoder.layers.12.self_attn.v_proj.weight': 0.0003623973170761019, 'model.decoder.layers.12.self_attn.v_proj.bias': 0.004810386803001165, 'model.decoder.layers.12.self_attn.q_proj.weight': 2.4387672965531237e-05, 'model.decoder.layers.12.self_attn.q_proj.bias': 0.0006049377843737602, 'model.decoder.layers.12.self_attn.out_proj.weight': 2.0980100089218467e-05, 'model.decoder.layers.12.self_attn.out_proj.bias': 0.0037242495454847813, 'model.decoder.layers.12.self_attn_layer_norm.weight': 0.0002736416645348072, 'model.decoder.layers.12.self_attn_layer_norm.bias': 0.0108397351577878, 'model.decoder.layers.12.fc1.weight': 4.7448615077883005e-05, 'model.decoder.layers.12.fc1.bias': 0.0008975025848485529, 'model.decoder.layers.12.fc2.weight': 2.871520700864494e-05, 'model.decoder.layers.12.fc2.bias': 0.011479119770228863, 'model.decoder.layers.12.final_layer_norm.weight': 0.0033557522110641003, 'model.decoder.layers.12.final_layer_norm.bias': 0.004673722665756941, 'model.decoder.layers.13.self_attn.k_proj.weight': 0.00011653665569610894, 'model.decoder.layers.13.self_attn.k_proj.bias': 1.7158043874587747e-09, 'model.decoder.layers.13.self_attn.v_proj.weight': 0.0002871536125894636, 'model.decoder.layers.13.self_attn.v_proj.bias': 0.0035168619360774755, 'model.decoder.layers.13.self_attn.q_proj.weight': 4.61002164229285e-05, 'model.decoder.layers.13.self_attn.q_proj.bias': 0.0004209924372844398, 'model.decoder.layers.13.self_attn.out_proj.weight': 4.131090099690482e-05, 'model.decoder.layers.13.self_attn.out_proj.bias': 0.00010073347948491573, 'model.decoder.layers.13.self_attn_layer_norm.weight': 0.003654373809695244, 'model.decoder.layers.13.self_attn_layer_norm.bias': 0.005862885154783726, 'model.decoder.layers.13.fc1.weight': 4.259213892510161e-05, 'model.decoder.layers.13.fc1.bias': 0.00028196259518153965, 'model.decoder.layers.13.fc2.weight': 1.934450892804307e-06, 'model.decoder.layers.13.fc2.bias': 0.006977382116019726, 'model.decoder.layers.13.final_layer_norm.weight': 0.004070748575031757, 'model.decoder.layers.13.final_layer_norm.bias': 0.0012171454727649689, 'model.decoder.layers.14.self_attn.k_proj.weight': 2.5059925974346697e-05, 'model.decoder.layers.14.self_attn.k_proj.bias': 7.952873914973679e-10, 'model.decoder.layers.14.self_attn.v_proj.weight': 9.57314478000626e-05, 'model.decoder.layers.14.self_attn.v_proj.bias': 0.009776213206350803, 'model.decoder.layers.14.self_attn.q_proj.weight': 9.453647180635016e-06, 'model.decoder.layers.14.self_attn.q_proj.bias': 3.741658292710781e-05, 'model.decoder.layers.14.self_attn.out_proj.weight': 4.4570078898686916e-05, 'model.decoder.layers.14.self_attn.out_proj.bias': 0.004945332184433937, 'model.decoder.layers.14.self_attn_layer_norm.weight': 0.0027952108066529036, 'model.decoder.layers.14.self_attn_layer_norm.bias': 0.0017658316064625978, 'model.decoder.layers.14.fc1.weight': 2.2840002202428877e-05, 'model.decoder.layers.14.fc1.bias': 0.00041145997238345444, 'model.decoder.layers.14.fc2.weight': 9.089013474294916e-06, 'model.decoder.layers.14.fc2.bias': 0.0013153913896530867, 'model.decoder.layers.14.final_layer_norm.weight': 0.00015770667232573032, 'model.decoder.layers.14.final_layer_norm.bias': 0.0003043359611183405, 'model.decoder.layers.15.self_attn.k_proj.weight': 1.691165925876703e-05, 'model.decoder.layers.15.self_attn.k_proj.bias': 1.7975807509174047e-09, 'model.decoder.layers.15.self_attn.v_proj.weight': 6.160035263746977e-05, 'model.decoder.layers.15.self_attn.v_proj.bias': 0.004183325916528702, 'model.decoder.layers.15.self_attn.q_proj.weight': 9.07519097381737e-06, 'model.decoder.layers.15.self_attn.q_proj.bias': 0.001580888987518847, 'model.decoder.layers.15.self_attn.out_proj.weight': 4.506875484366901e-06, 'model.decoder.layers.15.self_attn.out_proj.bias': 0.00036701885983347893, 'model.decoder.layers.15.self_attn_layer_norm.weight': 0.0014215430710464716, 'model.decoder.layers.15.self_attn_layer_norm.bias': 0.000428510713391006, 'model.decoder.layers.15.fc1.weight': 8.200007869163528e-06, 'model.decoder.layers.15.fc1.bias': 0.0004673587391152978, 'model.decoder.layers.15.fc2.weight': 9.591492016625125e-06, 'model.decoder.layers.15.fc2.bias': 0.008664260618388653, 'model.decoder.layers.15.final_layer_norm.weight': 0.00045877337106503546, 'model.decoder.layers.15.final_layer_norm.bias': 3.923231270164251e-05, 'model.decoder.layers.16.self_attn.k_proj.weight': 8.037301995500457e-06, 'model.decoder.layers.16.self_attn.k_proj.bias': 3.4562219752842793e-10, 'model.decoder.layers.16.self_attn.v_proj.weight': 0.0003743779961951077, 'model.decoder.layers.16.self_attn.v_proj.bias': 0.005785172805190086, 'model.decoder.layers.16.self_attn.q_proj.weight': 2.8230115276528522e-05, 'model.decoder.layers.16.self_attn.q_proj.bias': 0.00019174485350959003, 'model.decoder.layers.16.self_attn.out_proj.weight': 1.0935635145870037e-05, 'model.decoder.layers.16.self_attn.out_proj.bias': 0.003971535246819258, 'model.decoder.layers.16.self_attn_layer_norm.weight': 0.00021049194037914276, 'model.decoder.layers.16.self_attn_layer_norm.bias': 0.0005579772405326366, 'model.decoder.layers.16.fc1.weight': 1.307657021243358e-05, 'model.decoder.layers.16.fc1.bias': 0.00021872477373108268, 'model.decoder.layers.16.fc2.weight': 9.013900125864893e-06, 'model.decoder.layers.16.fc2.bias': 0.006086578592658043, 'model.decoder.layers.16.final_layer_norm.weight': 0.000611298019066453, 'model.decoder.layers.16.final_layer_norm.bias': 0.0002765201497823, 'model.decoder.layers.17.self_attn.k_proj.weight': 2.1650248527294025e-05, 'model.decoder.layers.17.self_attn.k_proj.bias': 1.0046452558754027e-10, 'model.decoder.layers.17.self_attn.v_proj.weight': 0.00011780836939578876, 'model.decoder.layers.17.self_attn.v_proj.bias': 0.009902569465339184, 'model.decoder.layers.17.self_attn.q_proj.weight': 9.811848940444179e-06, 'model.decoder.layers.17.self_attn.q_proj.bias': 0.0010778481373563409, 'model.decoder.layers.17.self_attn.out_proj.weight': 3.1169627618510276e-05, 'model.decoder.layers.17.self_attn.out_proj.bias': 0.0014936437364667654, 'model.decoder.layers.17.self_attn_layer_norm.weight': 0.0011217626743018627, 'model.decoder.layers.17.self_attn_layer_norm.bias': 0.005995844025164843, 'model.decoder.layers.17.fc1.weight': 1.495193373557413e-05, 'model.decoder.layers.17.fc1.bias': 8.811787120066583e-05, 'model.decoder.layers.17.fc2.weight': 5.135842684467207e-07, 'model.decoder.layers.17.fc2.bias': 0.004266152158379555, 'model.decoder.layers.17.final_layer_norm.weight': 0.0015217175241559744, 'model.decoder.layers.17.final_layer_norm.bias': 0.0006807037862017751, 'model.decoder.layers.18.self_attn.k_proj.weight': 8.06709340395173e-06, 'model.decoder.layers.18.self_attn.k_proj.bias': 2.382538610845586e-10, 'model.decoder.layers.18.self_attn.v_proj.weight': 0.00010729266796261072, 'model.decoder.layers.18.self_attn.v_proj.bias': 0.0002375934855081141, 'model.decoder.layers.18.self_attn.q_proj.weight': 1.3824549569108058e-05, 'model.decoder.layers.18.self_attn.q_proj.bias': 0.0008267344674095511, 'model.decoder.layers.18.self_attn.out_proj.weight': 4.728955536847934e-05, 'model.decoder.layers.18.self_attn.out_proj.bias': 0.00023239030269905925, 'model.decoder.layers.18.self_attn_layer_norm.weight': 0.0017888545989990234, 'model.decoder.layers.18.self_attn_layer_norm.bias': 0.0026490602176636457, 'model.decoder.layers.18.fc1.weight': 2.582679189799819e-05, 'model.decoder.layers.18.fc1.bias': 1.660519774304703e-05, 'model.decoder.layers.18.fc2.weight': 7.1948597906157374e-06, 'model.decoder.layers.18.fc2.bias': 0.0015473762759938836, 'model.decoder.layers.18.final_layer_norm.weight': 9.034632239490747e-05, 'model.decoder.layers.18.final_layer_norm.bias': 0.0019304485758766532, 'model.decoder.layers.19.self_attn.k_proj.weight': 2.0197403500787914e-05, 'model.decoder.layers.19.self_attn.k_proj.bias': 1.303785968076454e-10, 'model.decoder.layers.19.self_attn.v_proj.weight': 0.0002107986219925806, 'model.decoder.layers.19.self_attn.v_proj.bias': 0.0008111624629236758, 'model.decoder.layers.19.self_attn.q_proj.weight': 1.4097247913014144e-05, 'model.decoder.layers.19.self_attn.q_proj.bias': 4.587244620779529e-05, 'model.decoder.layers.19.self_attn.out_proj.weight': 3.4978158510057256e-05, 'model.decoder.layers.19.self_attn.out_proj.bias': 0.0005899042589589953, 'model.decoder.layers.19.self_attn_layer_norm.weight': 0.0013289607595652342, 'model.decoder.layers.19.self_attn_layer_norm.bias': 0.0023845075629651546, 'model.decoder.layers.19.fc1.weight': 1.2554923159768805e-05, 'model.decoder.layers.19.fc1.bias': 0.0001672703365329653, 'model.decoder.layers.19.fc2.weight': 6.046268026693724e-06, 'model.decoder.layers.19.fc2.bias': 0.002383989281952381, 'model.decoder.layers.19.final_layer_norm.weight': 0.0005191607633605599, 'model.decoder.layers.19.final_layer_norm.bias': 0.0011610982473939657, 'model.decoder.layers.20.self_attn.k_proj.weight': 2.1556634237640537e-05, 'model.decoder.layers.20.self_attn.k_proj.bias': 1.950688499618991e-10, 'model.decoder.layers.20.self_attn.v_proj.weight': 6.871994264656678e-05, 'model.decoder.layers.20.self_attn.v_proj.bias': 0.0016091763973236084, 'model.decoder.layers.20.self_attn.q_proj.weight': 1.1541615094756708e-05, 'model.decoder.layers.20.self_attn.q_proj.bias': 0.0002420599339529872, 'model.decoder.layers.20.self_attn.out_proj.weight': 6.485556696134154e-06, 'model.decoder.layers.20.self_attn.out_proj.bias': 0.0017362730577588081, 'model.decoder.layers.20.self_attn_layer_norm.weight': 0.000966782565228641, 'model.decoder.layers.20.self_attn_layer_norm.bias': 0.0024521793238818645, 'model.decoder.layers.20.fc1.weight': 2.220020178356208e-06, 'model.decoder.layers.20.fc1.bias': 0.00023903901455923915, 'model.decoder.layers.20.fc2.weight': 5.11652751811198e-06, 'model.decoder.layers.20.fc2.bias': 0.001610452076420188, 'model.decoder.layers.20.final_layer_norm.weight': 0.0006391120259650052, 'model.decoder.layers.20.final_layer_norm.bias': 0.00036247906973585486, 'model.decoder.layers.21.self_attn.k_proj.weight': 2.875465725082904e-06, 'model.decoder.layers.21.self_attn.k_proj.bias': 1.0963829844001793e-10, 'model.decoder.layers.21.self_attn.v_proj.weight': 0.0001076810949598439, 'model.decoder.layers.21.self_attn.v_proj.bias': 0.0012785817962139845, 'model.decoder.layers.21.self_attn.q_proj.weight': 5.267726010060869e-06, 'model.decoder.layers.21.self_attn.q_proj.bias': 0.0005636128480546176, 'model.decoder.layers.21.self_attn.out_proj.weight': 1.0467017091286834e-05, 'model.decoder.layers.21.self_attn.out_proj.bias': 0.0006349491886794567, 'model.decoder.layers.21.self_attn_layer_norm.weight': 9.788910392671824e-05, 'model.decoder.layers.21.self_attn_layer_norm.bias': 0.0018109140219166875, 'model.decoder.layers.21.fc1.weight': 1.0524420758883934e-05, 'model.decoder.layers.21.fc1.bias': 0.0001395009458065033, 'model.decoder.layers.21.fc2.weight': 7.825683496776037e-07, 'model.decoder.layers.21.fc2.bias': 0.00027287568082101643, 'model.decoder.layers.21.final_layer_norm.weight': 0.002189337508752942, 'model.decoder.layers.21.final_layer_norm.bias': 0.0003936631546821445, 'model.decoder.layers.22.self_attn.k_proj.weight': 7.157395884860307e-06, 'model.decoder.layers.22.self_attn.k_proj.bias': 1.2284405714879654e-09, 'model.decoder.layers.22.self_attn.v_proj.weight': 0.0001079331268556416, 'model.decoder.layers.22.self_attn.v_proj.bias': 0.0024641603231430054, 'model.decoder.layers.22.self_attn.q_proj.weight': 2.9348946554819122e-05, 'model.decoder.layers.22.self_attn.q_proj.bias': 0.00046467551146633923, 'model.decoder.layers.22.self_attn.out_proj.weight': 1.1630811059148982e-05, 'model.decoder.layers.22.self_attn.out_proj.bias': 1.3318858691491187e-05, 'model.decoder.layers.22.self_attn_layer_norm.weight': 0.0024139797315001488, 'model.decoder.layers.22.self_attn_layer_norm.bias': 0.0008520184201188385, 'model.decoder.layers.22.fc1.weight': 1.299438372370787e-05, 'model.decoder.layers.22.fc1.bias': 6.156826566439122e-05, 'model.decoder.layers.22.fc2.weight': 6.837515229562996e-06, 'model.decoder.layers.22.fc2.bias': 0.000750359205994755, 'model.decoder.layers.22.final_layer_norm.weight': 0.0005745317903347313, 'model.decoder.layers.22.final_layer_norm.bias': 0.00038529568701051176, 'model.decoder.layers.23.self_attn.k_proj.weight': 0.0008154936949722469, 'model.decoder.layers.23.self_attn.k_proj.bias': 1.5340060599555727e-07, 'model.decoder.layers.23.self_attn.v_proj.weight': 1.653862273087725e-05, 'model.decoder.layers.23.self_attn.v_proj.bias': 0.0008790317224338651, 'model.decoder.layers.23.self_attn.q_proj.weight': 9.975660759664606e-06, 'model.decoder.layers.23.self_attn.q_proj.bias': 0.0004680745187215507, 'model.decoder.layers.23.self_attn.out_proj.weight': 2.2089188860263675e-06, 'model.decoder.layers.23.self_attn.out_proj.bias': 0.00054021121468395, 'model.decoder.layers.23.self_attn_layer_norm.weight': 0.05621916800737381, 'model.decoder.layers.23.self_attn_layer_norm.bias': 0.0024355368223041296, 'model.decoder.layers.23.fc1.weight': 1.85953795153182e-05, 'model.decoder.layers.23.fc1.bias': 9.960689203580841e-05, 'model.decoder.layers.23.fc2.weight': 1.5280495517799864e-06, 'model.decoder.layers.23.fc2.bias': 0.0008676279685460031, 'model.decoder.layers.23.final_layer_norm.weight': 0.0021101871971040964, 'model.decoder.layers.23.final_layer_norm.bias': 0.0014080661348998547}\n",
    "#hessian_trace = {'model.decoder.embed_tokens.weight': 1194.9275919596355, 'model.decoder.embed_positions.weight': 62.206461588541664, 'model.decoder.final_layer_norm.weight': 9.23440663019816, 'model.decoder.final_layer_norm.bias': 3.905731995900472, 'model.decoder.layers.0.self_attn.k_proj.weight': 80.81728871663411, 'model.decoder.layers.0.self_attn.k_proj.bias': 4.4644428953688475e-07, 'model.decoder.layers.0.self_attn.v_proj.weight': 199.46858723958334, 'model.decoder.layers.0.self_attn.v_proj.bias': 16.865546941757202, 'model.decoder.layers.0.self_attn.q_proj.weight': 122.04016367594402, 'model.decoder.layers.0.self_attn.q_proj.bias': 1.4802623589833577, 'model.decoder.layers.0.self_attn.out_proj.weight': 332.82420857747394, 'model.decoder.layers.0.self_attn.out_proj.bias': 86.57215690612793, 'model.decoder.layers.0.self_attn_layer_norm.weight': 1.176854819059372, 'model.decoder.layers.0.self_attn_layer_norm.bias': 3.8826220830281577, 'model.decoder.layers.0.fc1.weight': 1026.0242919921875, 'model.decoder.layers.0.fc1.bias': 5.837026913960774, 'model.decoder.layers.0.fc2.weight': 434.50232950846356, 'model.decoder.layers.0.fc2.bias': 10.715811093648275, 'model.decoder.layers.0.final_layer_norm.weight': 1.1563409169514973, 'model.decoder.layers.0.final_layer_norm.bias': 0.8455956776936849, 'model.decoder.layers.1.self_attn.k_proj.weight': 9.219987392425537, 'model.decoder.layers.1.self_attn.k_proj.bias': 1.667769045828512e-07, 'model.decoder.layers.1.self_attn.v_proj.weight': 1181.0137125651042, 'model.decoder.layers.1.self_attn.v_proj.bias': 7.343598206837972, 'model.decoder.layers.1.self_attn.q_proj.weight': 13.207474072774252, 'model.decoder.layers.1.self_attn.q_proj.bias': 1.038723607858022, 'model.decoder.layers.1.self_attn.out_proj.weight': 23.346385955810547, 'model.decoder.layers.1.self_attn.out_proj.bias': 25.51119613647461, 'model.decoder.layers.1.self_attn_layer_norm.weight': 1.7841885884602864, 'model.decoder.layers.1.self_attn_layer_norm.bias': 0.32044434547424316, 'model.decoder.layers.1.fc1.weight': 496.04359944661456, 'model.decoder.layers.1.fc1.bias': 4.226608117421468, 'model.decoder.layers.1.fc2.weight': 82.67283121744792, 'model.decoder.layers.1.fc2.bias': 4.699393590291341, 'model.decoder.layers.1.final_layer_norm.weight': 0.5279660224914551, 'model.decoder.layers.1.final_layer_norm.bias': 2.80998166402181, 'model.decoder.layers.2.self_attn.k_proj.weight': 50.17959785461426, 'model.decoder.layers.2.self_attn.k_proj.bias': 8.968943348008906e-07, 'model.decoder.layers.2.self_attn.v_proj.weight': 406.1774190266927, 'model.decoder.layers.2.self_attn.v_proj.bias': 34.865872065226235, 'model.decoder.layers.2.self_attn.q_proj.weight': 73.74266052246094, 'model.decoder.layers.2.self_attn.q_proj.bias': 0.4287129243214925, 'model.decoder.layers.2.self_attn.out_proj.weight': 159.37913386027017, 'model.decoder.layers.2.self_attn.out_proj.bias': 6.17769734064738, 'model.decoder.layers.2.self_attn_layer_norm.weight': 3.1265578269958496, 'model.decoder.layers.2.self_attn_layer_norm.bias': 1.1245239575703938, 'model.decoder.layers.2.fc1.weight': 1249.8432515462239, 'model.decoder.layers.2.fc1.bias': 17.075202624003094, 'model.decoder.layers.2.fc2.weight': 95.0023930867513, 'model.decoder.layers.2.fc2.bias': 8.266383488972982, 'model.decoder.layers.2.final_layer_norm.weight': 0.3421815236409505, 'model.decoder.layers.2.final_layer_norm.bias': 4.646793524424235, 'model.decoder.layers.3.self_attn.k_proj.weight': 31.99148464202881, 'model.decoder.layers.3.self_attn.k_proj.bias': 5.800834514957387e-07, 'model.decoder.layers.3.self_attn.v_proj.weight': 1177.5484822591145, 'model.decoder.layers.3.self_attn.v_proj.bias': 12.81634553273519, 'model.decoder.layers.3.self_attn.q_proj.weight': 159.7560297648112, 'model.decoder.layers.3.self_attn.q_proj.bias': 1.7579676707585652, 'model.decoder.layers.3.self_attn.out_proj.weight': 30.28198496500651, 'model.decoder.layers.3.self_attn.out_proj.bias': 4.435482978820801, 'model.decoder.layers.3.self_attn_layer_norm.weight': 13.2084059715271, 'model.decoder.layers.3.self_attn_layer_norm.bias': 4.749482790629069, 'model.decoder.layers.3.fc1.weight': 217.31351216634116, 'model.decoder.layers.3.fc1.bias': 1.0110915501912434, 'model.decoder.layers.3.fc2.weight': 26.99854024251302, 'model.decoder.layers.3.fc2.bias': 31.778201421101887, 'model.decoder.layers.3.final_layer_norm.weight': 21.061067899068195, 'model.decoder.layers.3.final_layer_norm.bias': 2.965498447418213, 'model.decoder.layers.4.self_attn.k_proj.weight': 80.69611740112305, 'model.decoder.layers.4.self_attn.k_proj.bias': 1.0404449615937967e-06, 'model.decoder.layers.4.self_attn.v_proj.weight': 1757.7148030598958, 'model.decoder.layers.4.self_attn.v_proj.bias': 12.15992546081543, 'model.decoder.layers.4.self_attn.q_proj.weight': 120.78164800008138, 'model.decoder.layers.4.self_attn.q_proj.bias': 0.05103441079457601, 'model.decoder.layers.4.self_attn.out_proj.weight': 81.84792073567708, 'model.decoder.layers.4.self_attn.out_proj.bias': 5.4139251708984375, 'model.decoder.layers.4.self_attn_layer_norm.weight': 5.0145918528238935, 'model.decoder.layers.4.self_attn_layer_norm.bias': 6.521610577901204, 'model.decoder.layers.4.fc1.weight': 588.6539815266927, 'model.decoder.layers.4.fc1.bias': 7.932360013326009, 'model.decoder.layers.4.fc2.weight': 41.35539881388346, 'model.decoder.layers.4.fc2.bias': 26.083303451538086, 'model.decoder.layers.4.final_layer_norm.weight': 1.0110756556193035, 'model.decoder.layers.4.final_layer_norm.bias': 10.816126108169556, 'model.decoder.layers.5.self_attn.k_proj.weight': 23.25175412495931, 'model.decoder.layers.5.self_attn.k_proj.bias': 4.583553163683973e-07, 'model.decoder.layers.5.self_attn.v_proj.weight': 977.9918212890625, 'model.decoder.layers.5.self_attn.v_proj.bias': 19.987658818562824, 'model.decoder.layers.5.self_attn.q_proj.weight': 117.74191029866536, 'model.decoder.layers.5.self_attn.q_proj.bias': 1.2908051510651906, 'model.decoder.layers.5.self_attn.out_proj.weight': 72.65645345052083, 'model.decoder.layers.5.self_attn.out_proj.bias': 23.379069010416668, 'model.decoder.layers.5.self_attn_layer_norm.weight': 3.5170437494913735, 'model.decoder.layers.5.self_attn_layer_norm.bias': 16.186225255330402, 'model.decoder.layers.5.fc1.weight': 11.558486938476562, 'model.decoder.layers.5.fc1.bias': 3.054628372192383, 'model.decoder.layers.5.fc2.weight': 104.43958695729573, 'model.decoder.layers.5.fc2.bias': 39.510135650634766, 'model.decoder.layers.5.final_layer_norm.weight': 0.47280871868133545, 'model.decoder.layers.5.final_layer_norm.bias': 6.2945131460825605, 'model.decoder.layers.6.self_attn.k_proj.weight': 9.65622329711914, 'model.decoder.layers.6.self_attn.k_proj.bias': 1.539022681148102e-07, 'model.decoder.layers.6.self_attn.v_proj.weight': 3969.3793131510415, 'model.decoder.layers.6.self_attn.v_proj.bias': 0.15558862686157227, 'model.decoder.layers.6.self_attn.q_proj.weight': 33.485870361328125, 'model.decoder.layers.6.self_attn.q_proj.bias': 1.1183207035064697, 'model.decoder.layers.6.self_attn.out_proj.weight': 211.32996877034506, 'model.decoder.layers.6.self_attn.out_proj.bias': 32.47879123687744, 'model.decoder.layers.6.self_attn_layer_norm.weight': 7.60649045308431, 'model.decoder.layers.6.self_attn_layer_norm.bias': 2.608099619547526, 'model.decoder.layers.6.fc1.weight': 295.0924479166667, 'model.decoder.layers.6.fc1.bias': 0.5748985211054484, 'model.decoder.layers.6.fc2.weight': 130.67133585611978, 'model.decoder.layers.6.fc2.bias': 57.66433970133463, 'model.decoder.layers.6.final_layer_norm.weight': 1.0882696310679119, 'model.decoder.layers.6.final_layer_norm.bias': 1.008333683013916, 'model.decoder.layers.7.self_attn.k_proj.weight': 11.613513310750326, 'model.decoder.layers.7.self_attn.k_proj.bias': 5.06440301251132e-07, 'model.decoder.layers.7.self_attn.v_proj.weight': 3297.1876627604165, 'model.decoder.layers.7.self_attn.v_proj.bias': 12.69955571492513, 'model.decoder.layers.7.self_attn.q_proj.weight': 112.87678782145183, 'model.decoder.layers.7.self_attn.q_proj.bias': 0.9876437584559122, 'model.decoder.layers.7.self_attn.out_proj.weight': 218.1019083658854, 'model.decoder.layers.7.self_attn.out_proj.bias': 42.892110188802086, 'model.decoder.layers.7.self_attn_layer_norm.weight': 15.2481476465861, 'model.decoder.layers.7.self_attn_layer_norm.bias': 1.5999202728271484, 'model.decoder.layers.7.fc1.weight': 589.8232116699219, 'model.decoder.layers.7.fc1.bias': 2.181494394938151, 'model.decoder.layers.7.fc2.weight': 126.20874532063802, 'model.decoder.layers.7.fc2.bias': 13.879932085673014, 'model.decoder.layers.7.final_layer_norm.weight': 2.940955638885498, 'model.decoder.layers.7.final_layer_norm.bias': 0.084258238474528, 'model.decoder.layers.8.self_attn.k_proj.weight': 100.79911295572917, 'model.decoder.layers.8.self_attn.k_proj.bias': 6.637437763856724e-07, 'model.decoder.layers.8.self_attn.v_proj.weight': 3215.857666015625, 'model.decoder.layers.8.self_attn.v_proj.bias': 10.213244756062826, 'model.decoder.layers.8.self_attn.q_proj.weight': 0.3197813034057617, 'model.decoder.layers.8.self_attn.q_proj.bias': 1.476491649945577, 'model.decoder.layers.8.self_attn.out_proj.weight': 85.60840352376302, 'model.decoder.layers.8.self_attn.out_proj.bias': 29.13232167561849, 'model.decoder.layers.8.self_attn_layer_norm.weight': 14.836044152577719, 'model.decoder.layers.8.self_attn_layer_norm.bias': 8.231045087178549, 'model.decoder.layers.8.fc1.weight': 1029.8172200520833, 'model.decoder.layers.8.fc1.bias': 14.799010515213013, 'model.decoder.layers.8.fc2.weight': 140.59540557861328, 'model.decoder.layers.8.fc2.bias': 27.155534426371258, 'model.decoder.layers.8.final_layer_norm.weight': 3.5487560033798218, 'model.decoder.layers.8.final_layer_norm.bias': 5.694772958755493, 'model.decoder.layers.9.self_attn.k_proj.weight': 20.521867752075195, 'model.decoder.layers.9.self_attn.k_proj.bias': 5.125757525092922e-07, 'model.decoder.layers.9.self_attn.v_proj.weight': 3243.8319498697915, 'model.decoder.layers.9.self_attn.v_proj.bias': 26.176570892333984, 'model.decoder.layers.9.self_attn.q_proj.weight': 19.8382085164388, 'model.decoder.layers.9.self_attn.q_proj.bias': 0.5916710992654165, 'model.decoder.layers.9.self_attn.out_proj.weight': 323.1460316975911, 'model.decoder.layers.9.self_attn.out_proj.bias': 28.43972905476888, 'model.decoder.layers.9.self_attn_layer_norm.weight': 4.2964372634887695, 'model.decoder.layers.9.self_attn_layer_norm.bias': 19.154499371846516, 'model.decoder.layers.9.fc1.weight': 325.1955108642578, 'model.decoder.layers.9.fc1.bias': 16.9618345896403, 'model.decoder.layers.9.fc2.weight': 36.228437741597496, 'model.decoder.layers.9.fc2.bias': 10.317483266194662, 'model.decoder.layers.9.final_layer_norm.weight': 5.6070036093393965, 'model.decoder.layers.9.final_layer_norm.bias': 4.4560166994730634, 'model.decoder.layers.10.self_attn.k_proj.weight': 40.140647888183594, 'model.decoder.layers.10.self_attn.k_proj.bias': 1.2212367437314242e-06, 'model.decoder.layers.10.self_attn.v_proj.weight': 3635.5814615885415, 'model.decoder.layers.10.self_attn.v_proj.bias': 4.02281379699707, 'model.decoder.layers.10.self_attn.q_proj.weight': 87.16917165120442, 'model.decoder.layers.10.self_attn.q_proj.bias': 0.6868895689646403, 'model.decoder.layers.10.self_attn.out_proj.weight': 40.987963358561196, 'model.decoder.layers.10.self_attn.out_proj.bias': 32.79821650187174, 'model.decoder.layers.10.self_attn_layer_norm.weight': 1.8764712015787761, 'model.decoder.layers.10.self_attn_layer_norm.bias': 9.303855578104654, 'model.decoder.layers.10.fc1.weight': 278.33945719401044, 'model.decoder.layers.10.fc1.bias': 2.5051854451497397, 'model.decoder.layers.10.fc2.weight': 83.3369852701823, 'model.decoder.layers.10.fc2.bias': 14.743237813313803, 'model.decoder.layers.10.final_layer_norm.weight': 3.912165323893229, 'model.decoder.layers.10.final_layer_norm.bias': 2.3821582794189453, 'model.decoder.layers.11.self_attn.k_proj.weight': 133.46747334798178, 'model.decoder.layers.11.self_attn.k_proj.bias': 4.210044911208873e-07, 'model.decoder.layers.11.self_attn.v_proj.weight': 3056.7123209635415, 'model.decoder.layers.11.self_attn.v_proj.bias': 8.785794576009115, 'model.decoder.layers.11.self_attn.q_proj.weight': 238.0325164794922, 'model.decoder.layers.11.self_attn.q_proj.bias': 1.6419847806294758, 'model.decoder.layers.11.self_attn.out_proj.weight': 260.1416371663411, 'model.decoder.layers.11.self_attn.out_proj.bias': 25.442824681599934, 'model.decoder.layers.11.self_attn_layer_norm.weight': 4.364767074584961, 'model.decoder.layers.11.self_attn_layer_norm.bias': 3.1743253072102866, 'model.decoder.layers.11.fc1.weight': 611.6233622233073, 'model.decoder.layers.11.fc1.bias': 3.2861900329589844, 'model.decoder.layers.11.fc2.weight': 182.40909322102866, 'model.decoder.layers.11.fc2.bias': 12.629422346750895, 'model.decoder.layers.11.final_layer_norm.weight': 1.7137949069341023, 'model.decoder.layers.11.final_layer_norm.bias': 7.593117078145345, 'model.decoder.layers.12.self_attn.k_proj.weight': 114.80877431233723, 'model.decoder.layers.12.self_attn.k_proj.bias': 1.3701719581149518e-07, 'model.decoder.layers.12.self_attn.v_proj.weight': 1478.7087809244792, 'model.decoder.layers.12.self_attn.v_proj.bias': 14.836765925089518, 'model.decoder.layers.12.self_attn.q_proj.weight': 48.580858866373696, 'model.decoder.layers.12.self_attn.q_proj.bias': 1.3795514702796936, 'model.decoder.layers.12.self_attn.out_proj.weight': 204.7934595743815, 'model.decoder.layers.12.self_attn.out_proj.bias': 16.134981791178387, 'model.decoder.layers.12.self_attn_layer_norm.weight': 1.0600868860880535, 'model.decoder.layers.12.self_attn_layer_norm.bias': 4.9434904257456465, 'model.decoder.layers.12.fc1.weight': 399.1716817220052, 'model.decoder.layers.12.fc1.bias': 2.676555315653483, 'model.decoder.layers.12.fc2.weight': 178.97567240397134, 'model.decoder.layers.12.fc2.bias': 8.133237838745117, 'model.decoder.layers.12.final_layer_norm.weight': 2.2795286178588867, 'model.decoder.layers.12.final_layer_norm.bias': 0.9722844759623209, 'model.decoder.layers.13.self_attn.k_proj.weight': 122.33266957600911, 'model.decoder.layers.13.self_attn.k_proj.bias': 5.167579123129448e-07, 'model.decoder.layers.13.self_attn.v_proj.weight': 2155.754638671875, 'model.decoder.layers.13.self_attn.v_proj.bias': 2.5755065282185874, 'model.decoder.layers.13.self_attn.q_proj.weight': 130.72259012858072, 'model.decoder.layers.13.self_attn.q_proj.bias': 0.0631372481584549, 'model.decoder.layers.13.self_attn.out_proj.weight': 232.62322998046875, 'model.decoder.layers.13.self_attn.out_proj.bias': 6.0946706136067705, 'model.decoder.layers.13.self_attn_layer_norm.weight': 1.5373609066009521, 'model.decoder.layers.13.self_attn_layer_norm.bias': 5.817979653676351, 'model.decoder.layers.13.fc1.weight': 235.78799057006836, 'model.decoder.layers.13.fc1.bias': 7.012344042460124, 'model.decoder.layers.13.fc2.weight': 88.86114565531413, 'model.decoder.layers.13.fc2.bias': 8.57457160949707, 'model.decoder.layers.13.final_layer_norm.weight': 3.601138432820638, 'model.decoder.layers.13.final_layer_norm.bias': 0.26936769485473633, 'model.decoder.layers.14.self_attn.k_proj.weight': 58.549051920572914, 'model.decoder.layers.14.self_attn.k_proj.bias': 1.050436064057673e-07, 'model.decoder.layers.14.self_attn.v_proj.weight': 1588.015116373698, 'model.decoder.layers.14.self_attn.v_proj.bias': 1.025929609934489, 'model.decoder.layers.14.self_attn.q_proj.weight': 195.23397318522134, 'model.decoder.layers.14.self_attn.q_proj.bias': 0.5062949657440186, 'model.decoder.layers.14.self_attn.out_proj.weight': 189.880859375, 'model.decoder.layers.14.self_attn.out_proj.bias': 24.7160218556722, 'model.decoder.layers.14.self_attn_layer_norm.weight': 0.4795061747233073, 'model.decoder.layers.14.self_attn_layer_norm.bias': 7.3194325764973955, 'model.decoder.layers.14.fc1.weight': 237.11504618326822, 'model.decoder.layers.14.fc1.bias': 0.6288999716440836, 'model.decoder.layers.14.fc2.weight': 300.7984568277995, 'model.decoder.layers.14.fc2.bias': 12.473860263824463, 'model.decoder.layers.14.final_layer_norm.weight': 5.3096497853597, 'model.decoder.layers.14.final_layer_norm.bias': 3.44138240814209, 'model.decoder.layers.15.self_attn.k_proj.weight': 89.43447367350261, 'model.decoder.layers.15.self_attn.k_proj.bias': 2.0174735861170725e-06, 'model.decoder.layers.15.self_attn.v_proj.weight': 976.4321492513021, 'model.decoder.layers.15.self_attn.v_proj.bias': 7.8637111981709795, 'model.decoder.layers.15.self_attn.q_proj.weight': 40.05120849609375, 'model.decoder.layers.15.self_attn.q_proj.bias': 1.86562579870224, 'model.decoder.layers.15.self_attn.out_proj.weight': 91.47459411621094, 'model.decoder.layers.15.self_attn.out_proj.bias': 9.201977014541626, 'model.decoder.layers.15.self_attn_layer_norm.weight': 8.13798983891805, 'model.decoder.layers.15.self_attn_layer_norm.bias': 4.080739339192708, 'model.decoder.layers.15.fc1.weight': 357.3152160644531, 'model.decoder.layers.15.fc1.bias': 2.50886599222819, 'model.decoder.layers.15.fc2.weight': 75.35965347290039, 'model.decoder.layers.15.fc2.bias': 1.269438107808431, 'model.decoder.layers.15.final_layer_norm.weight': 0.22304606437683105, 'model.decoder.layers.15.final_layer_norm.bias': 2.5818015336990356, 'model.decoder.layers.16.self_attn.k_proj.weight': 76.5325101216634, 'model.decoder.layers.16.self_attn.k_proj.bias': 2.3469116664879644e-07, 'model.decoder.layers.16.self_attn.v_proj.weight': 1183.2902018229167, 'model.decoder.layers.16.self_attn.v_proj.bias': 2.2639951705932617, 'model.decoder.layers.16.self_attn.q_proj.weight': 10.976270039876303, 'model.decoder.layers.16.self_attn.q_proj.bias': 0.7257786691188812, 'model.decoder.layers.16.self_attn.out_proj.weight': 214.39805603027344, 'model.decoder.layers.16.self_attn.out_proj.bias': 2.0170888900756836, 'model.decoder.layers.16.self_attn_layer_norm.weight': 3.77446182568868, 'model.decoder.layers.16.self_attn_layer_norm.bias': 6.8916622797648115, 'model.decoder.layers.16.fc1.weight': 312.0342610677083, 'model.decoder.layers.16.fc1.bias': 2.356794555981954, 'model.decoder.layers.16.fc2.weight': 173.55509440104166, 'model.decoder.layers.16.fc2.bias': 3.8546457290649414, 'model.decoder.layers.16.final_layer_norm.weight': 1.478039264678955, 'model.decoder.layers.16.final_layer_norm.bias': 2.4736698865890503, 'model.decoder.layers.17.self_attn.k_proj.weight': 74.0786501566569, 'model.decoder.layers.17.self_attn.k_proj.bias': 1.765959799134483e-07, 'model.decoder.layers.17.self_attn.v_proj.weight': 672.512196858724, 'model.decoder.layers.17.self_attn.v_proj.bias': 3.420607646306356, 'model.decoder.layers.17.self_attn.q_proj.weight': 135.22151947021484, 'model.decoder.layers.17.self_attn.q_proj.bias': 0.22574500242869058, 'model.decoder.layers.17.self_attn.out_proj.weight': 165.94742329915366, 'model.decoder.layers.17.self_attn.out_proj.bias': 6.086919943491618, 'model.decoder.layers.17.self_attn_layer_norm.weight': 1.4737507502237956, 'model.decoder.layers.17.self_attn_layer_norm.bias': 0.47539273897806805, 'model.decoder.layers.17.fc1.weight': 202.49539184570312, 'model.decoder.layers.17.fc1.bias': 2.116861661275228, 'model.decoder.layers.17.fc2.weight': 47.12660471598307, 'model.decoder.layers.17.fc2.bias': 5.959351936976115, 'model.decoder.layers.17.final_layer_norm.weight': 0.7635479966799418, 'model.decoder.layers.17.final_layer_norm.bias': 0.5586413145065308, 'model.decoder.layers.18.self_attn.k_proj.weight': 13.660836537679037, 'model.decoder.layers.18.self_attn.k_proj.bias': 7.450204672447095e-07, 'model.decoder.layers.18.self_attn.v_proj.weight': 583.4378763834635, 'model.decoder.layers.18.self_attn.v_proj.bias': 0.2318133513132731, 'model.decoder.layers.18.self_attn.q_proj.weight': 56.271016438802086, 'model.decoder.layers.18.self_attn.q_proj.bias': 0.44491873184839886, 'model.decoder.layers.18.self_attn.out_proj.weight': 28.96947987874349, 'model.decoder.layers.18.self_attn.out_proj.bias': 1.8864336013793945, 'model.decoder.layers.18.self_attn_layer_norm.weight': 5.648584405581157, 'model.decoder.layers.18.self_attn_layer_norm.bias': 1.0241370995839436, 'model.decoder.layers.18.fc1.weight': 82.8545633951823, 'model.decoder.layers.18.fc1.bias': 1.1417232354482014, 'model.decoder.layers.18.fc2.weight': 67.36198616027832, 'model.decoder.layers.18.fc2.bias': 0.23440690835316977, 'model.decoder.layers.18.final_layer_norm.weight': 0.851794163386027, 'model.decoder.layers.18.final_layer_norm.bias': 0.3095352252324422, 'model.decoder.layers.19.self_attn.k_proj.weight': 38.4573860168457, 'model.decoder.layers.19.self_attn.k_proj.bias': 5.608417268376797e-07, 'model.decoder.layers.19.self_attn.v_proj.weight': 577.9170532226562, 'model.decoder.layers.19.self_attn.v_proj.bias': 2.1673796971639, 'model.decoder.layers.19.self_attn.q_proj.weight': 41.061418533325195, 'model.decoder.layers.19.self_attn.q_proj.bias': 0.3846500714619954, 'model.decoder.layers.19.self_attn.out_proj.weight': 135.37067794799805, 'model.decoder.layers.19.self_attn.out_proj.bias': 1.2105334599812825, 'model.decoder.layers.19.self_attn_layer_norm.weight': 0.1599578062693278, 'model.decoder.layers.19.self_attn_layer_norm.bias': 2.913751403490702, 'model.decoder.layers.19.fc1.weight': 284.32102457682294, 'model.decoder.layers.19.fc1.bias': 1.2599942684173584, 'model.decoder.layers.19.fc2.weight': 4.3566131591796875, 'model.decoder.layers.19.fc2.bias': 3.6627607345581055, 'model.decoder.layers.19.final_layer_norm.weight': 0.6927271286646525, 'model.decoder.layers.19.final_layer_norm.bias': 0.2087437758843104, 'model.decoder.layers.20.self_attn.k_proj.weight': 23.685892740885418, 'model.decoder.layers.20.self_attn.k_proj.bias': 1.1672106362918082e-07, 'model.decoder.layers.20.self_attn.v_proj.weight': 624.1569519042969, 'model.decoder.layers.20.self_attn.v_proj.bias': 2.149170478185018, 'model.decoder.layers.20.self_attn.q_proj.weight': 110.40034612019856, 'model.decoder.layers.20.self_attn.q_proj.bias': 0.26976368327935535, 'model.decoder.layers.20.self_attn.out_proj.weight': 9.283737182617188, 'model.decoder.layers.20.self_attn.out_proj.bias': 0.5362516244252523, 'model.decoder.layers.20.self_attn_layer_norm.weight': 0.06875685850779216, 'model.decoder.layers.20.self_attn_layer_norm.bias': 4.817276040712993, 'model.decoder.layers.20.fc1.weight': 195.2688191731771, 'model.decoder.layers.20.fc1.bias': 0.41515107949574787, 'model.decoder.layers.20.fc2.weight': 84.10826841990153, 'model.decoder.layers.20.fc2.bias': 3.36245059967041, 'model.decoder.layers.20.final_layer_norm.weight': 0.5196702281634012, 'model.decoder.layers.20.final_layer_norm.bias': 0.7786722580591837, 'model.decoder.layers.21.self_attn.k_proj.weight': 90.89690272013347, 'model.decoder.layers.21.self_attn.k_proj.bias': 2.7367506542456493e-07, 'model.decoder.layers.21.self_attn.v_proj.weight': 447.6248372395833, 'model.decoder.layers.21.self_attn.v_proj.bias': 1.1513359546661377, 'model.decoder.layers.21.self_attn.q_proj.weight': 56.912890116373696, 'model.decoder.layers.21.self_attn.q_proj.bias': 0.3401174147923787, 'model.decoder.layers.21.self_attn.out_proj.weight': 92.01056448618571, 'model.decoder.layers.21.self_attn.out_proj.bias': 2.642144759496053, 'model.decoder.layers.21.self_attn_layer_norm.weight': 0.012430349985758463, 'model.decoder.layers.21.self_attn_layer_norm.bias': 1.4953891436258953, 'model.decoder.layers.21.fc1.weight': 182.3254648844401, 'model.decoder.layers.21.fc1.bias': 0.4217895766099294, 'model.decoder.layers.21.fc2.weight': 48.591383616129555, 'model.decoder.layers.21.fc2.bias': 1.6303943395614624, 'model.decoder.layers.21.final_layer_norm.weight': 0.1133418579896291, 'model.decoder.layers.21.final_layer_norm.bias': 0.8550681173801422, 'model.decoder.layers.22.self_attn.k_proj.weight': 55.32455571492513, 'model.decoder.layers.22.self_attn.k_proj.bias': 5.932955673415563e-07, 'model.decoder.layers.22.self_attn.v_proj.weight': 260.07740783691406, 'model.decoder.layers.22.self_attn.v_proj.bias': 0.6635967095692953, 'model.decoder.layers.22.self_attn.q_proj.weight': 41.74598375956217, 'model.decoder.layers.22.self_attn.q_proj.bias': 0.7733224692444006, 'model.decoder.layers.22.self_attn.out_proj.weight': 7.186656634012858, 'model.decoder.layers.22.self_attn.out_proj.bias': 2.8045431772867837, 'model.decoder.layers.22.self_attn_layer_norm.weight': 0.10426807403564453, 'model.decoder.layers.22.self_attn_layer_norm.bias': 1.7604074478149414, 'model.decoder.layers.22.fc1.weight': 323.54155985514325, 'model.decoder.layers.22.fc1.bias': 3.094419479370117, 'model.decoder.layers.22.fc2.weight': 125.38926951090495, 'model.decoder.layers.22.fc2.bias': 1.9736011028289795, 'model.decoder.layers.22.final_layer_norm.weight': 3.3347854614257812, 'model.decoder.layers.22.final_layer_norm.bias': 1.4577517807483673, 'model.decoder.layers.23.self_attn.k_proj.weight': 2116.7174072265625, 'model.decoder.layers.23.self_attn.k_proj.bias': 0.0005777936894446611, 'model.decoder.layers.23.self_attn.v_proj.weight': 259.937255859375, 'model.decoder.layers.23.self_attn.v_proj.bias': 2.886035362879435, 'model.decoder.layers.23.self_attn.q_proj.weight': 23.766716639200848, 'model.decoder.layers.23.self_attn.q_proj.bias': 0.33590322732925415, 'model.decoder.layers.23.self_attn.out_proj.weight': 15.94110107421875, 'model.decoder.layers.23.self_attn.out_proj.bias': 2.1962711811065674, 'model.decoder.layers.23.self_attn_layer_norm.weight': 116.557679494222, 'model.decoder.layers.23.self_attn_layer_norm.bias': 1.6408641537030537, 'model.decoder.layers.23.fc1.weight': 361.1596272786458, 'model.decoder.layers.23.fc1.bias': 0.11565979321797688, 'model.decoder.layers.23.fc2.weight': 151.1081339518229, 'model.decoder.layers.23.fc2.bias': 2.4938165744145713, 'model.decoder.layers.23.final_layer_norm.weight': 4.278749386469523, 'model.decoder.layers.23.final_layer_norm.bias': 2.4786187410354614}\n",
    "sensitivity = [0]*24\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.decoder.layers\"):\n",
    "       layer = int(name.split('.')[3])\n",
    "       sensitivity[layer] += trace\n",
    "print(sensitivity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cc459a5e-8def-40c9-a534-c3044ac8e7a0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1513.0305585898777, 1270.8919252737633, 975.4037257357148, 1546.236512639175, 1570.9296292897138, 1710.5916946639418, 2579.875780077182, 2876.4986672811033, 3246.852629712942, 4618.321481762417, 4541.876624815845, 3857.7017805140526, 3339.151764879946, 3211.957446126425, 2282.788240383803, 2428.0685215430767, 1958.729125737127, 1326.0629923152626, 1214.438442518927, 1039.799794204453, 823.1728501649245, 710.086921476227, 738.2685832145203, 1999.661469769194]\n"
     ]
    }
   ],
   "source": [
    "### 4 samples\n",
    "hessian_trace = {'model.decoder.embed_tokens.weight': 1342.1681722005208, 'model.decoder.embed_positions.weight': 24.1683931350708, 'model.decoder.final_layer_norm.weight': 5.933843771616618, 'model.decoder.final_layer_norm.bias': 1.2799332936604817, 'model.decoder.layers.0.self_attn.k_proj.weight': 13.01789657274882, 'model.decoder.layers.0.self_attn.k_proj.bias': 1.5028540426934947e-07, 'model.decoder.layers.0.self_attn.v_proj.weight': 304.1651204427083, 'model.decoder.layers.0.self_attn.v_proj.bias': 14.386877536773682, 'model.decoder.layers.0.self_attn.q_proj.weight': 10.8556489944458, 'model.decoder.layers.0.self_attn.q_proj.bias': 0.25254246840874356, 'model.decoder.layers.0.self_attn.out_proj.weight': 112.90345764160156, 'model.decoder.layers.0.self_attn.out_proj.bias': 30.885592142740887, 'model.decoder.layers.0.self_attn_layer_norm.weight': 2.4177544911702475, 'model.decoder.layers.0.self_attn_layer_norm.bias': 1.0536502202351887, 'model.decoder.layers.0.fc1.weight': 821.9387613932291, 'model.decoder.layers.0.fc1.bias': 3.001427173614502, 'model.decoder.layers.0.fc2.weight': 188.03184763590494, 'model.decoder.layers.0.fc2.bias': 7.955014069875081, 'model.decoder.layers.0.final_layer_norm.weight': 0.9421019156773885, 'model.decoder.layers.0.final_layer_norm.bias': 1.2228657404581706, 'model.decoder.layers.1.self_attn.k_proj.weight': 8.205692927042643, 'model.decoder.layers.1.self_attn.k_proj.bias': 2.6665850327844964e-07, 'model.decoder.layers.1.self_attn.v_proj.weight': 822.5408528645834, 'model.decoder.layers.1.self_attn.v_proj.bias': 1.8569959004720051, 'model.decoder.layers.1.self_attn.q_proj.weight': 7.195351918538411, 'model.decoder.layers.1.self_attn.q_proj.bias': 0.2418122192223867, 'model.decoder.layers.1.self_attn.out_proj.weight': 24.672770182291668, 'model.decoder.layers.1.self_attn.out_proj.bias': 13.183728138605753, 'model.decoder.layers.1.self_attn_layer_norm.weight': 0.6793864965438843, 'model.decoder.layers.1.self_attn_layer_norm.bias': 3.7234507401784263, 'model.decoder.layers.1.fc1.weight': 282.2998758951823, 'model.decoder.layers.1.fc1.bias': 0.4193686644236247, 'model.decoder.layers.1.fc2.weight': 75.47132873535156, 'model.decoder.layers.1.fc2.bias': 29.394294102986652, 'model.decoder.layers.1.final_layer_norm.weight': 0.8705424865086874, 'model.decoder.layers.1.final_layer_norm.bias': 0.1364737351735433, 'model.decoder.layers.2.self_attn.k_proj.weight': 12.485458691914877, 'model.decoder.layers.2.self_attn.k_proj.bias': 7.189398350722816e-08, 'model.decoder.layers.2.self_attn.v_proj.weight': 749.4091593424479, 'model.decoder.layers.2.self_attn.v_proj.bias': 7.210757732391357, 'model.decoder.layers.2.self_attn.q_proj.weight': 10.92270278930664, 'model.decoder.layers.2.self_attn.q_proj.bias': 0.6713455518086752, 'model.decoder.layers.2.self_attn.out_proj.weight': 41.51854960123698, 'model.decoder.layers.2.self_attn.out_proj.bias': 1.3576285044352214, 'model.decoder.layers.2.self_attn_layer_norm.weight': 3.345496336619059, 'model.decoder.layers.2.self_attn_layer_norm.bias': 0.8379338979721069, 'model.decoder.layers.2.fc1.weight': 108.57311979929607, 'model.decoder.layers.2.fc1.bias': 0.923136313756307, 'model.decoder.layers.2.fc2.weight': 25.575768788655598, 'model.decoder.layers.2.fc2.bias': 0.32785797119140625, 'model.decoder.layers.2.final_layer_norm.weight': 6.692549546559651, 'model.decoder.layers.2.final_layer_norm.bias': 5.552260796229045, 'model.decoder.layers.3.self_attn.k_proj.weight': 22.768256505330402, 'model.decoder.layers.3.self_attn.k_proj.bias': 1.5700887464239108e-07, 'model.decoder.layers.3.self_attn.v_proj.weight': 1145.0419921875, 'model.decoder.layers.3.self_attn.v_proj.bias': 6.66569463411967, 'model.decoder.layers.3.self_attn.q_proj.weight': 19.294229825337727, 'model.decoder.layers.3.self_attn.q_proj.bias': 0.315186083316803, 'model.decoder.layers.3.self_attn.out_proj.weight': 54.681200663248696, 'model.decoder.layers.3.self_attn.out_proj.bias': 0.49727551142374676, 'model.decoder.layers.3.self_attn_layer_norm.weight': 1.138353665669759, 'model.decoder.layers.3.self_attn_layer_norm.bias': 3.2116569677988687, 'model.decoder.layers.3.fc1.weight': 279.9979070027669, 'model.decoder.layers.3.fc1.bias': 1.323710044225057, 'model.decoder.layers.3.fc2.weight': 2.0273634592692056, 'model.decoder.layers.3.fc2.bias': 6.608207066853841, 'model.decoder.layers.3.final_layer_norm.weight': 2.182908614476522, 'model.decoder.layers.3.final_layer_norm.bias': 0.48257025082906085, 'model.decoder.layers.4.self_attn.k_proj.weight': 16.235650698343914, 'model.decoder.layers.4.self_attn.k_proj.bias': 6.493005836697799e-07, 'model.decoder.layers.4.self_attn.v_proj.weight': 949.1672159830729, 'model.decoder.layers.4.self_attn.v_proj.bias': 7.903181870778401, 'model.decoder.layers.4.self_attn.q_proj.weight': 25.188310305277508, 'model.decoder.layers.4.self_attn.q_proj.bias': 0.7994148234526316, 'model.decoder.layers.4.self_attn.out_proj.weight': 69.90832201639812, 'model.decoder.layers.4.self_attn.out_proj.bias': 29.1803191502889, 'model.decoder.layers.4.self_attn_layer_norm.weight': 3.0491552352905273, 'model.decoder.layers.4.self_attn_layer_norm.bias': 0.7611263593037924, 'model.decoder.layers.4.fc1.weight': 405.6389617919922, 'model.decoder.layers.4.fc1.bias': 2.56512713432312, 'model.decoder.layers.4.fc2.weight': 54.75786590576172, 'model.decoder.layers.4.fc2.bias': 3.882349212964376, 'model.decoder.layers.4.final_layer_norm.weight': 0.7533944050470988, 'model.decoder.layers.4.final_layer_norm.bias': 1.1392337481180828, 'model.decoder.layers.5.self_attn.k_proj.weight': 6.377862850824992, 'model.decoder.layers.5.self_attn.k_proj.bias': 3.5860136904375395e-07, 'model.decoder.layers.5.self_attn.v_proj.weight': 1242.8815002441406, 'model.decoder.layers.5.self_attn.v_proj.bias': 8.07829213142395, 'model.decoder.layers.5.self_attn.q_proj.weight': 19.28999423980713, 'model.decoder.layers.5.self_attn.q_proj.bias': 0.521608034769694, 'model.decoder.layers.5.self_attn.out_proj.weight': 34.956345876057945, 'model.decoder.layers.5.self_attn.out_proj.bias': 6.511233647664388, 'model.decoder.layers.5.self_attn_layer_norm.weight': 1.7302594582239788, 'model.decoder.layers.5.self_attn_layer_norm.bias': 3.8111457427342734, 'model.decoder.layers.5.fc1.weight': 328.6005554199219, 'model.decoder.layers.5.fc1.bias': 1.3406884968280792, 'model.decoder.layers.5.fc2.weight': 36.76817957560221, 'model.decoder.layers.5.fc2.bias': 18.055283069610596, 'model.decoder.layers.5.final_layer_norm.weight': 0.3436125914255778, 'model.decoder.layers.5.final_layer_norm.bias': 1.325132926305135, 'model.decoder.layers.6.self_attn.k_proj.weight': 10.794076919555664, 'model.decoder.layers.6.self_attn.k_proj.bias': 2.3981236078422324e-07, 'model.decoder.layers.6.self_attn.v_proj.weight': 1967.4698486328125, 'model.decoder.layers.6.self_attn.v_proj.bias': 2.7173641522725425, 'model.decoder.layers.6.self_attn.q_proj.weight': 31.59640057881673, 'model.decoder.layers.6.self_attn.q_proj.bias': 0.00972248117129008, 'model.decoder.layers.6.self_attn.out_proj.weight': 184.19303385416666, 'model.decoder.layers.6.self_attn.out_proj.bias': 29.44133122762044, 'model.decoder.layers.6.self_attn_layer_norm.weight': 1.0593587557474773, 'model.decoder.layers.6.self_attn_layer_norm.bias': 2.42702849706014, 'model.decoder.layers.6.fc1.weight': 229.94681803385416, 'model.decoder.layers.6.fc1.bias': 0.06882564226786296, 'model.decoder.layers.6.fc2.weight': 113.79475402832031, 'model.decoder.layers.6.fc2.bias': 5.546576182047526, 'model.decoder.layers.6.final_layer_norm.weight': 0.09554886817932129, 'model.decoder.layers.6.final_layer_norm.bias': 0.7150919834772745, 'model.decoder.layers.7.self_attn.k_proj.weight': 13.400712331136068, 'model.decoder.layers.7.self_attn.k_proj.bias': 1.701039119931617e-07, 'model.decoder.layers.7.self_attn.v_proj.weight': 2298.189697265625, 'model.decoder.layers.7.self_attn.v_proj.bias': 1.2284919420878093, 'model.decoder.layers.7.self_attn.q_proj.weight': 10.520524978637695, 'model.decoder.layers.7.self_attn.q_proj.bias': 0.22085949778556824, 'model.decoder.layers.7.self_attn.out_proj.weight': 53.21618398030599, 'model.decoder.layers.7.self_attn.out_proj.bias': 20.785338401794434, 'model.decoder.layers.7.self_attn_layer_norm.weight': 2.9862199624379477, 'model.decoder.layers.7.self_attn_layer_norm.bias': 7.838295618693034, 'model.decoder.layers.7.fc1.weight': 368.243169148763, 'model.decoder.layers.7.fc1.bias': 0.4342960516611735, 'model.decoder.layers.7.fc2.weight': 84.12944602966309, 'model.decoder.layers.7.fc2.bias': 11.731993675231934, 'model.decoder.layers.7.final_layer_norm.weight': 1.369914670785268, 'model.decoder.layers.7.final_layer_norm.bias': 2.203523556391398, 'model.decoder.layers.8.self_attn.k_proj.weight': 6.932219505310059, 'model.decoder.layers.8.self_attn.k_proj.bias': 1.30854933407439e-07, 'model.decoder.layers.8.self_attn.v_proj.weight': 2706.6857096354165, 'model.decoder.layers.8.self_attn.v_proj.bias': 13.201882044474283, 'model.decoder.layers.8.self_attn.q_proj.weight': 7.6140516599019366, 'model.decoder.layers.8.self_attn.q_proj.bias': 0.7999947865804037, 'model.decoder.layers.8.self_attn.out_proj.weight': 53.63329315185547, 'model.decoder.layers.8.self_attn.out_proj.bias': 15.787126859029135, 'model.decoder.layers.8.self_attn_layer_norm.weight': 1.3414497375488281, 'model.decoder.layers.8.self_attn_layer_norm.bias': 5.609317461649577, 'model.decoder.layers.8.fc1.weight': 254.87800089518228, 'model.decoder.layers.8.fc1.bias': 3.0886189937591553, 'model.decoder.layers.8.fc2.weight': 164.77111434936523, 'model.decoder.layers.8.fc2.bias': 8.514549255371094, 'model.decoder.layers.8.final_layer_norm.weight': 1.83138902982076, 'model.decoder.layers.8.final_layer_norm.bias': 2.163912216822306, 'model.decoder.layers.9.self_attn.k_proj.weight': 30.084943135579426, 'model.decoder.layers.9.self_attn.k_proj.bias': 1.272437657462433e-07, 'model.decoder.layers.9.self_attn.v_proj.weight': 3811.4280598958335, 'model.decoder.layers.9.self_attn.v_proj.bias': 1.2687160968780518, 'model.decoder.layers.9.self_attn.q_proj.weight': 33.55126953125, 'model.decoder.layers.9.self_attn.q_proj.bias': 0.6936888297398885, 'model.decoder.layers.9.self_attn.out_proj.weight': 71.98683802286784, 'model.decoder.layers.9.self_attn.out_proj.bias': 40.606370290120445, 'model.decoder.layers.9.self_attn_layer_norm.weight': 4.061672687530518, 'model.decoder.layers.9.self_attn_layer_norm.bias': 4.892525831858317, 'model.decoder.layers.9.fc1.weight': 489.4402669270833, 'model.decoder.layers.9.fc1.bias': 4.20834685365359, 'model.decoder.layers.9.fc2.weight': 108.20776875813802, 'model.decoder.layers.9.fc2.bias': 14.503772735595703, 'model.decoder.layers.9.final_layer_norm.weight': 0.16778302192687988, 'model.decoder.layers.9.final_layer_norm.bias': 3.2194590171178183, 'model.decoder.layers.10.self_attn.k_proj.weight': 35.980258305867515, 'model.decoder.layers.10.self_attn.k_proj.bias': 3.3098763196903747e-07, 'model.decoder.layers.10.self_attn.v_proj.weight': 3778.4922688802085, 'model.decoder.layers.10.self_attn.v_proj.bias': 1.888675371805827, 'model.decoder.layers.10.self_attn.q_proj.weight': 55.78825982411703, 'model.decoder.layers.10.self_attn.q_proj.bias': 0.016521771748860676, 'model.decoder.layers.10.self_attn.out_proj.weight': 103.24035898844402, 'model.decoder.layers.10.self_attn.out_proj.bias': 3.2191861470540366, 'model.decoder.layers.10.self_attn_layer_norm.weight': 0.1414454778035482, 'model.decoder.layers.10.self_attn_layer_norm.bias': 1.8941406806310017, 'model.decoder.layers.10.fc1.weight': 518.4652811686198, 'model.decoder.layers.10.fc1.bias': 0.8037732442220052, 'model.decoder.layers.10.fc2.weight': 37.37336349487305, 'model.decoder.layers.10.fc2.bias': 0.7380590438842773, 'model.decoder.layers.10.final_layer_norm.weight': 2.578613022963206, 'model.decoder.layers.10.final_layer_norm.bias': 1.256419062614441, 'model.decoder.layers.11.self_attn.k_proj.weight': 6.835479736328125, 'model.decoder.layers.11.self_attn.k_proj.bias': 2.147071427316405e-07, 'model.decoder.layers.11.self_attn.v_proj.weight': 2937.44677734375, 'model.decoder.layers.11.self_attn.v_proj.bias': 17.048807779947918, 'model.decoder.layers.11.self_attn.q_proj.weight': 110.27231089274089, 'model.decoder.layers.11.self_attn.q_proj.bias': 0.9680182139078776, 'model.decoder.layers.11.self_attn.out_proj.weight': 82.83829752604167, 'model.decoder.layers.11.self_attn.out_proj.bias': 24.171387513478596, 'model.decoder.layers.11.self_attn_layer_norm.weight': 0.7926605343818665, 'model.decoder.layers.11.self_attn_layer_norm.bias': 5.214005311330159, 'model.decoder.layers.11.fc1.weight': 492.5968831380208, 'model.decoder.layers.11.fc1.bias': 5.350625991821289, 'model.decoder.layers.11.fc2.weight': 167.3804677327474, 'model.decoder.layers.11.fc2.bias': 0.03993860880533854, 'model.decoder.layers.11.final_layer_norm.weight': 1.8592697779337566, 'model.decoder.layers.11.final_layer_norm.bias': 4.886850198109944, 'model.decoder.layers.12.self_attn.k_proj.weight': 43.24777857462565, 'model.decoder.layers.12.self_attn.k_proj.bias': 3.380811222086777e-07, 'model.decoder.layers.12.self_attn.v_proj.weight': 2416.315877278646, 'model.decoder.layers.12.self_attn.v_proj.bias': 7.949548403422038, 'model.decoder.layers.12.self_attn.q_proj.weight': 18.589203516642254, 'model.decoder.layers.12.self_attn.q_proj.bias': 0.3109743495782216, 'model.decoder.layers.12.self_attn.out_proj.weight': 135.22427876790366, 'model.decoder.layers.12.self_attn.out_proj.bias': 1.0633862018585205, 'model.decoder.layers.12.self_attn_layer_norm.weight': 0.6929528713226318, 'model.decoder.layers.12.self_attn_layer_norm.bias': 6.564732472101848, 'model.decoder.layers.12.fc1.weight': 510.18975830078125, 'model.decoder.layers.12.fc1.bias': 1.5228395859400432, 'model.decoder.layers.12.fc2.weight': 183.53431701660156, 'model.decoder.layers.12.fc2.bias': 11.815310796101889, 'model.decoder.layers.12.final_layer_norm.weight': 0.2734771966934204, 'model.decoder.layers.12.final_layer_norm.bias': 1.8573292096455891, 'model.decoder.layers.13.self_attn.k_proj.weight': 44.70322275161743, 'model.decoder.layers.13.self_attn.k_proj.bias': 9.76356204773765e-08, 'model.decoder.layers.13.self_attn.v_proj.weight': 1943.0528971354167, 'model.decoder.layers.13.self_attn.v_proj.bias': 0.27065277099609375, 'model.decoder.layers.13.self_attn.q_proj.weight': 47.39048957824707, 'model.decoder.layers.13.self_attn.q_proj.bias': 1.2670722007751465, 'model.decoder.layers.13.self_attn.out_proj.weight': 247.59878540039062, 'model.decoder.layers.13.self_attn.out_proj.bias': 0.6574381192525228, 'model.decoder.layers.13.self_attn_layer_norm.weight': 0.49679656823476154, 'model.decoder.layers.13.self_attn_layer_norm.bias': 9.2999587059021, 'model.decoder.layers.13.fc1.weight': 618.9151204427084, 'model.decoder.layers.13.fc1.bias': 2.018078883488973, 'model.decoder.layers.13.fc2.weight': 285.63037109375, 'model.decoder.layers.13.fc2.bias': 7.971388816833496, 'model.decoder.layers.13.final_layer_norm.weight': 0.2821807265281677, 'model.decoder.layers.13.final_layer_norm.bias': 2.4029928346474967, 'model.decoder.layers.14.self_attn.k_proj.weight': 99.39690907796223, 'model.decoder.layers.14.self_attn.k_proj.bias': 3.484277234141094e-07, 'model.decoder.layers.14.self_attn.v_proj.weight': 1353.603271484375, 'model.decoder.layers.14.self_attn.v_proj.bias': 6.766694068908691, 'model.decoder.layers.14.self_attn.q_proj.weight': 44.544328689575195, 'model.decoder.layers.14.self_attn.q_proj.bias': 1.0361899733543396, 'model.decoder.layers.14.self_attn.out_proj.weight': 120.91687774658203, 'model.decoder.layers.14.self_attn.out_proj.bias': 7.758475422859192, 'model.decoder.layers.14.self_attn_layer_norm.weight': 1.9277111291885376, 'model.decoder.layers.14.self_attn_layer_norm.bias': 1.3719262679417927, 'model.decoder.layers.14.fc1.weight': 422.3934733072917, 'model.decoder.layers.14.fc1.bias': 1.4034167130788167, 'model.decoder.layers.14.fc2.weight': 211.75309244791666, 'model.decoder.layers.14.fc2.bias': 8.140815337498983, 'model.decoder.layers.14.final_layer_norm.weight': 1.3108351429303486, 'model.decoder.layers.14.final_layer_norm.bias': 0.4642232259114583, 'model.decoder.layers.15.self_attn.k_proj.weight': 88.71480814615886, 'model.decoder.layers.15.self_attn.k_proj.bias': 1.5271753757891324e-07, 'model.decoder.layers.15.self_attn.v_proj.weight': 1643.029052734375, 'model.decoder.layers.15.self_attn.v_proj.bias': 3.377858797709147, 'model.decoder.layers.15.self_attn.q_proj.weight': 26.42046356201172, 'model.decoder.layers.15.self_attn.q_proj.bias': 0.1289642651875814, 'model.decoder.layers.15.self_attn.out_proj.weight': 103.26455688476562, 'model.decoder.layers.15.self_attn.out_proj.bias': 4.1376891533533735, 'model.decoder.layers.15.self_attn_layer_norm.weight': 3.3278427918752036, 'model.decoder.layers.15.self_attn_layer_norm.bias': 0.5411892731984457, 'model.decoder.layers.15.fc1.weight': 352.7498728434245, 'model.decoder.layers.15.fc1.bias': 2.4237073560555777, 'model.decoder.layers.15.fc2.weight': 197.15579986572266, 'model.decoder.layers.15.fc2.bias': 1.8324532508850098, 'model.decoder.layers.15.final_layer_norm.weight': 0.5700578888257345, 'model.decoder.layers.15.final_layer_norm.bias': 0.394204576810201, 'model.decoder.layers.16.self_attn.k_proj.weight': 41.223559061686196, 'model.decoder.layers.16.self_attn.k_proj.bias': 1.8773107512970455e-07, 'model.decoder.layers.16.self_attn.v_proj.weight': 1117.0879516601562, 'model.decoder.layers.16.self_attn.v_proj.bias': 3.1535398960113525, 'model.decoder.layers.16.self_attn.q_proj.weight': 76.22063604990642, 'model.decoder.layers.16.self_attn.q_proj.bias': 0.6183621287345886, 'model.decoder.layers.16.self_attn.out_proj.weight': 102.32844193776448, 'model.decoder.layers.16.self_attn.out_proj.bias': 2.774540901184082, 'model.decoder.layers.16.self_attn_layer_norm.weight': 3.262865980466207, 'model.decoder.layers.16.self_attn_layer_norm.bias': 3.772494226694107, 'model.decoder.layers.16.fc1.weight': 341.94688924153644, 'model.decoder.layers.16.fc1.bias': 1.1457961002985637, 'model.decoder.layers.16.fc2.weight': 256.08323669433594, 'model.decoder.layers.16.fc2.bias': 7.97939395904541, 'model.decoder.layers.16.final_layer_norm.weight': 0.828700433174769, 'model.decoder.layers.16.final_layer_norm.bias': 0.30271727840105694, 'model.decoder.layers.17.self_attn.k_proj.weight': 7.235998789469401, 'model.decoder.layers.17.self_attn.k_proj.bias': 1.6467777186335297e-07, 'model.decoder.layers.17.self_attn.v_proj.weight': 653.8479207356771, 'model.decoder.layers.17.self_attn.v_proj.bias': 1.431160608927409, 'model.decoder.layers.17.self_attn.q_proj.weight': 41.294720808664955, 'model.decoder.layers.17.self_attn.q_proj.bias': 0.36227652927239734, 'model.decoder.layers.17.self_attn.out_proj.weight': 138.76739501953125, 'model.decoder.layers.17.self_attn.out_proj.bias': 3.0937984188397727, 'model.decoder.layers.17.self_attn_layer_norm.weight': 0.5298941532770792, 'model.decoder.layers.17.self_attn_layer_norm.bias': 0.5543633302052816, 'model.decoder.layers.17.fc1.weight': 294.45876057942706, 'model.decoder.layers.17.fc1.bias': 0.5229017933209738, 'model.decoder.layers.17.fc2.weight': 182.17564900716147, 'model.decoder.layers.17.fc2.bias': 1.306451718012492, 'model.decoder.layers.17.final_layer_norm.weight': 0.06069556872049967, 'model.decoder.layers.17.final_layer_norm.bias': 0.4210050900777181, 'model.decoder.layers.18.self_attn.k_proj.weight': 27.64689064025879, 'model.decoder.layers.18.self_attn.k_proj.bias': 6.021808000393018e-08, 'model.decoder.layers.18.self_attn.v_proj.weight': 654.0339762369791, 'model.decoder.layers.18.self_attn.v_proj.bias': 0.7127918601036072, 'model.decoder.layers.18.self_attn.q_proj.weight': 40.171119689941406, 'model.decoder.layers.18.self_attn.q_proj.bias': 0.05616576472918192, 'model.decoder.layers.18.self_attn.out_proj.weight': 98.11953481038411, 'model.decoder.layers.18.self_attn.out_proj.bias': 3.2646737893422446, 'model.decoder.layers.18.self_attn_layer_norm.weight': 1.225504477818807, 'model.decoder.layers.18.self_attn_layer_norm.bias': 0.88628617922465, 'model.decoder.layers.18.fc1.weight': 257.0570119222005, 'model.decoder.layers.18.fc1.bias': 0.1573167641957601, 'model.decoder.layers.18.fc2.weight': 125.86241149902344, 'model.decoder.layers.18.fc2.bias': 2.731776714324951, 'model.decoder.layers.18.final_layer_norm.weight': 0.7316379348436991, 'model.decoder.layers.18.final_layer_norm.bias': 1.7813441753387451, 'model.decoder.layers.19.self_attn.k_proj.weight': 1.417704741160075, 'model.decoder.layers.19.self_attn.k_proj.bias': 9.18101553300706e-08, 'model.decoder.layers.19.self_attn.v_proj.weight': 643.3982747395834, 'model.decoder.layers.19.self_attn.v_proj.bias': 2.0278032620747886, 'model.decoder.layers.19.self_attn.q_proj.weight': 38.04727300008138, 'model.decoder.layers.19.self_attn.q_proj.bias': 0.4496624718109767, 'model.decoder.layers.19.self_attn.out_proj.weight': 56.889912923177086, 'model.decoder.layers.19.self_attn.out_proj.bias': 1.1237153212229412, 'model.decoder.layers.19.self_attn_layer_norm.weight': 0.4070456027984619, 'model.decoder.layers.19.self_attn_layer_norm.bias': 0.6619784434636434, 'model.decoder.layers.19.fc1.weight': 193.38641866048178, 'model.decoder.layers.19.fc1.bias': 0.19038530190785727, 'model.decoder.layers.19.fc2.weight': 100.04376729329427, 'model.decoder.layers.19.fc2.bias': 1.519480029741923, 'model.decoder.layers.19.final_layer_norm.weight': 0.23273561398188272, 'model.decoder.layers.19.final_layer_norm.bias': 0.003636707862218221, 'model.decoder.layers.20.self_attn.k_proj.weight': 17.174970467885334, 'model.decoder.layers.20.self_attn.k_proj.bias': 1.4221110215354807e-07, 'model.decoder.layers.20.self_attn.v_proj.weight': 386.20668538411456, 'model.decoder.layers.20.self_attn.v_proj.bias': 1.0839078426361084, 'model.decoder.layers.20.self_attn.q_proj.weight': 33.20971123377482, 'model.decoder.layers.20.self_attn.q_proj.bias': 0.3634732762972514, 'model.decoder.layers.20.self_attn.out_proj.weight': 42.36864344278971, 'model.decoder.layers.20.self_attn.out_proj.bias': 0.04065712292989095, 'model.decoder.layers.20.self_attn_layer_norm.weight': 0.84199059009552, 'model.decoder.layers.20.self_attn_layer_norm.bias': 1.205614725748698, 'model.decoder.layers.20.fc1.weight': 224.06273396809897, 'model.decoder.layers.20.fc1.bias': 0.2046163578828176, 'model.decoder.layers.20.fc2.weight': 114.27700805664062, 'model.decoder.layers.20.fc2.bias': 1.6952022910118103, 'model.decoder.layers.20.final_layer_norm.weight': 0.0711510181427002, 'model.decoder.layers.20.final_layer_norm.bias': 0.3664842446645101, 'model.decoder.layers.21.self_attn.k_proj.weight': 35.402889251708984, 'model.decoder.layers.21.self_attn.k_proj.bias': 7.242168938622247e-08, 'model.decoder.layers.21.self_attn.v_proj.weight': 350.38463338216144, 'model.decoder.layers.21.self_attn.v_proj.bias': 3.2277145385742188, 'model.decoder.layers.21.self_attn.q_proj.weight': 11.360143979390463, 'model.decoder.layers.21.self_attn.q_proj.bias': 0.05229686697324117, 'model.decoder.layers.21.self_attn.out_proj.weight': 16.12295405069987, 'model.decoder.layers.21.self_attn.out_proj.bias': 0.885185440381368, 'model.decoder.layers.21.self_attn_layer_norm.weight': 0.6398048798243204, 'model.decoder.layers.21.self_attn_layer_norm.bias': 0.12247457106908162, 'model.decoder.layers.21.fc1.weight': 190.72351582845053, 'model.decoder.layers.21.fc1.bias': 0.46999767422676086, 'model.decoder.layers.21.fc2.weight': 99.63205464680989, 'model.decoder.layers.21.fc2.bias': 0.24548552433649698, 'model.decoder.layers.21.final_layer_norm.weight': 0.2244312266508738, 'model.decoder.layers.21.final_layer_norm.bias': 0.5933395425478617, 'model.decoder.layers.22.self_attn.k_proj.weight': 25.64413897196452, 'model.decoder.layers.22.self_attn.k_proj.bias': 4.1101259284914704e-07, 'model.decoder.layers.22.self_attn.v_proj.weight': 361.4449055989583, 'model.decoder.layers.22.self_attn.v_proj.bias': 1.1403959890206654, 'model.decoder.layers.22.self_attn.q_proj.weight': 33.05392837524414, 'model.decoder.layers.22.self_attn.q_proj.bias': 0.2844744051496188, 'model.decoder.layers.22.self_attn.out_proj.weight': 1.5946226119995117, 'model.decoder.layers.22.self_attn.out_proj.bias': 0.2257000058889389, 'model.decoder.layers.22.self_attn_layer_norm.weight': 0.9469030300776163, 'model.decoder.layers.22.self_attn_layer_norm.bias': 1.9069212277730305, 'model.decoder.layers.22.fc1.weight': 210.63877868652344, 'model.decoder.layers.22.fc1.bias': 0.8388123710950216, 'model.decoder.layers.22.fc2.weight': 99.31351470947266, 'model.decoder.layers.22.fc2.bias': 0.6318180710077286, 'model.decoder.layers.22.final_layer_norm.weight': 0.5377614498138428, 'model.decoder.layers.22.final_layer_norm.bias': 0.0659072995185852, 'model.decoder.layers.23.self_attn.k_proj.weight': 1289.1272684733074, 'model.decoder.layers.23.self_attn.k_proj.bias': 2.247886247156809e-05, 'model.decoder.layers.23.self_attn.v_proj.weight': 171.50445048014322, 'model.decoder.layers.23.self_attn.v_proj.bias': 0.48066914081573486, 'model.decoder.layers.23.self_attn.q_proj.weight': 9.926158428192139, 'model.decoder.layers.23.self_attn.q_proj.bias': 0.3147663362324238, 'model.decoder.layers.23.self_attn.out_proj.weight': 11.503536860148111, 'model.decoder.layers.23.self_attn.out_proj.bias': 0.41575293242931366, 'model.decoder.layers.23.self_attn_layer_norm.weight': 28.82709248860677, 'model.decoder.layers.23.self_attn_layer_norm.bias': 0.9931980967521667, 'model.decoder.layers.23.fc1.weight': 408.13881429036456, 'model.decoder.layers.23.fc1.bias': 0.8616287410259247, 'model.decoder.layers.23.fc2.weight': 75.49375279744466, 'model.decoder.layers.23.fc2.bias': 0.2855888406435649, 'model.decoder.layers.23.final_layer_norm.weight': 0.3316577474276225, 'model.decoder.layers.23.final_layer_norm.bias': 1.4571116367975872}\n",
    "sensitivity = [0]*24\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.decoder.layers\"):\n",
    "       layer = int(name.split('.')[3])\n",
    "       sensitivity[layer] += trace\n",
    "print(sensitivity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "4211f519-4d6e-4b54-a5de-304d4d9eefe0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1.818887085391907e-07, 2.2584390535485e-07, 5.135842684467207e-07, 6.586087692994624e-07, 7.825683496776037e-07, 1.5280495517799864e-06, 1.5418418115586974e-06, 1.5819550753803924e-06, 1.7441607269574888e-06, 1.934450892804307e-06, 2.20234619519033e-06, 2.2089188860263675e-06, 2.220020178356208e-06, 2.699195647437591e-06, 2.875465725082904e-06, 4.40867825091118e-06, 4.506875484366901e-06, 5.11652751811198e-06, 5.267726010060869e-06, 5.6471722018613946e-06, 5.873205282114213e-06, 6.018878593749832e-06, 6.046268026693724e-06, 6.125213985797018e-06, 6.13882457400905e-06, 6.485556696134154e-06, 6.528708581754472e-06, 6.837515229562996e-06, 6.936030331416987e-06, 7.157395884860307e-06, 7.1948597906157374e-06, 7.398500201816205e-06, 8.037301995500457e-06, 8.06709340395173e-06, 8.155950126820244e-06, 8.200007869163528e-06, 8.243401680374518e-06, 9.013900125864893e-06, 9.07519097381737e-06, 9.089013474294916e-06, 9.453647180635016e-06, 9.591492016625125e-06, 9.811848940444179e-06, 9.975660759664606e-06, 1.011752010526834e-05, 1.0467017091286834e-05, 1.0524420758883934e-05, 1.0651410775608383e-05, 1.06591141957324e-05, 1.0935635145870037e-05, 1.151437663793331e-05, 1.1541615094756708e-05, 1.1630811059148982e-05, 1.2554923159768805e-05, 1.265824175789021e-05, 1.299438372370787e-05, 1.307657021243358e-05, 1.312786935159238e-05, 1.3824549569108058e-05, 1.4097247913014144e-05, 1.495193373557413e-05, 1.5558865925413556e-05, 1.653862273087725e-05, 1.66144673130475e-05, 1.6726547983125784e-05, 1.691165925876703e-05, 1.7018075595842674e-05, 1.7195801774505526e-05, 1.7371028661727905e-05, 1.7706663129501976e-05, 1.818699274735991e-05, 1.85953795153182e-05, 1.8655551684787497e-05, 1.9550689103198238e-05, 1.976549538085237e-05, 1.984708069358021e-05, 2.0197403500787914e-05, 2.0471183233894408e-05, 2.0651876184274442e-05, 2.0980100089218467e-05, 2.1154875867068768e-05, 2.1341147657949477e-05, 2.1556634237640537e-05, 2.1650248527294025e-05, 2.2840002202428877e-05, 2.3366730601992458e-05, 2.3673514078836888e-05, 2.4387672965531237e-05, 2.5059925974346697e-05, 2.582679189799819e-05, 2.7213727662456222e-05, 2.8230115276528522e-05, 2.871520700864494e-05, 2.9348946554819122e-05, 3.1169627618510276e-05, 3.177683538524434e-05, 3.1982865039026365e-05, 3.265196210122667e-05, 3.445312177063897e-05, 3.4978158510057256e-05, 3.541999467415735e-05, 3.578695759642869e-05, 4.1227933252230287e-05, 4.131090099690482e-05, 4.259213892510161e-05, 4.4106283894507214e-05, 4.4570078898686916e-05, 4.527746204985306e-05, 4.553344115265645e-05, 4.61002164229285e-05, 4.645156877813861e-05, 4.728955536847934e-05, 4.7448615077883005e-05, 4.8154910473385826e-05, 5.2937732107238844e-05, 5.762240107287653e-05, 6.160035263746977e-05, 6.871994264656678e-05, 7.227572496049106e-05, 7.255269156303257e-05, 7.775293488521129e-05, 7.973126048455015e-05, 9.034632239490747e-05, 9.57314478000626e-05, 9.788910392671824e-05, 0.00010729266796261072, 0.0001076810949598439, 0.0001079331268556416, 0.00011045326391467825, 0.00011653665569610894, 0.00011780836939578876, 0.00012652759323827922, 0.00015770667232573032, 0.00021049194037914276, 0.0002107986219925806, 0.00021728238789364696, 0.0002496642409823835, 0.00025346881011500955, 0.0002736416645348072, 0.0002871536125894636, 0.0003623973170761019, 0.0003743779961951077, 0.00045877337106503546, 0.0004696718533523381, 0.0004895663587376475, 0.0005191607633605599, 0.000535756116732955, 0.0005745317903347313, 0.0005792105803266168, 0.000611298019066453, 0.0006141822086647153, 0.0006154445582069457, 0.0006180479540489614, 0.0006384402513504028, 0.0006391120259650052, 0.0007602623663842678, 0.0007680617272853851, 0.0007947176927700639, 0.0008154936949722469, 0.00083456642460078, 0.000966782565228641, 0.0011217626743018627, 0.0011564877349883318, 0.0013289607595652342, 0.0013620768440887332, 0.0014215430710464716, 0.001466980203986168, 0.001489672577008605, 0.0015217175241559744, 0.0016061868518590927, 0.0017888545989990234, 0.0019106791587546468, 0.0021101871971040964, 0.0021222857758402824, 0.002189337508752942, 0.0024139797315001488, 0.0024552810937166214, 0.00260370085015893, 0.0027952108066529036, 0.0033557522110641003, 0.003637113608419895, 0.003654373809695244, 0.0038702962920069695, 0.004070748575031757, 0.004621668718755245, 0.004658102057874203, 0.005492689553648233, 0.005598359275609255, 0.006313554476946592, 0.00713425362482667, 0.007943467237055302, 0.05621916800737381] 192\n",
      "0.4450261780104712\n"
     ]
    }
   ],
   "source": [
    "### 8 samples\n",
    "import bisect\n",
    "#hessian_trace = {'model.decoder.embed_tokens.weight': 1.1185309799289643e-05, 'model.decoder.embed_positions.weight': 8.143285607419362e-06, 'model.decoder.final_layer_norm.weight': 0.002161037642508745, 'model.decoder.final_layer_norm.bias': 0.004603024572134018, 'model.decoder.layers.0.self_attn.k_proj.weight': 4.645156877813861e-05, 'model.decoder.layers.0.self_attn.k_proj.bias': 3.031572493039647e-10, 'model.decoder.layers.0.self_attn.v_proj.weight': 1.66144673130475e-05, 'model.decoder.layers.0.self_attn.v_proj.bias': 0.0019134297035634518, 'model.decoder.layers.0.self_attn.q_proj.weight': 2.0651876184274442e-05, 'model.decoder.layers.0.self_attn.q_proj.bias': 0.00010981145896948874, 'model.decoder.layers.0.self_attn.out_proj.weight': 7.775293488521129e-05, 'model.decoder.layers.0.self_attn.out_proj.bias': 0.00035202689468860626, 'model.decoder.layers.0.self_attn_layer_norm.weight': 0.0013620768440887332, 'model.decoder.layers.0.self_attn_layer_norm.bias': 0.007437482010573149, 'model.decoder.layers.0.fc1.weight': 0.00012652759323827922, 'model.decoder.layers.0.fc1.bias': 0.0024492822121828794, 'model.decoder.layers.0.fc2.weight': 2.7213727662456222e-05, 'model.decoder.layers.0.fc2.bias': 0.008369727060198784, 'model.decoder.layers.0.final_layer_norm.weight': 0.001489672577008605, 'model.decoder.layers.0.final_layer_norm.bias': 0.0009513160330243409, 'model.decoder.layers.1.self_attn.k_proj.weight': 6.936030331416987e-06, 'model.decoder.layers.1.self_attn.k_proj.bias': 1.5926332475046934e-10, 'model.decoder.layers.1.self_attn.v_proj.weight': 0.00025346881011500955, 'model.decoder.layers.1.self_attn.v_proj.bias': 0.0021952157840132713, 'model.decoder.layers.1.self_attn.q_proj.weight': 1.818699274735991e-05, 'model.decoder.layers.1.self_attn.q_proj.bias': 0.0004335007688496262, 'model.decoder.layers.1.self_attn.out_proj.weight': 1.7706663129501976e-05, 'model.decoder.layers.1.self_attn.out_proj.bias': 0.010794132947921753, 'model.decoder.layers.1.self_attn_layer_norm.weight': 0.004621668718755245, 'model.decoder.layers.1.self_attn_layer_norm.bias': 0.006846331059932709, 'model.decoder.layers.1.fc1.weight': 5.2937732107238844e-05, 'model.decoder.layers.1.fc1.bias': 0.0014727330999448895, 'model.decoder.layers.1.fc2.weight': 7.398500201816205e-06, 'model.decoder.layers.1.fc2.bias': 0.03257054463028908, 'model.decoder.layers.1.final_layer_norm.weight': 0.0019106791587546468, 'model.decoder.layers.1.final_layer_norm.bias': 0.0022776746191084385, 'model.decoder.layers.2.self_attn.k_proj.weight': 6.018878593749832e-06, 'model.decoder.layers.2.self_attn.k_proj.bias': 2.9858293615347975e-10, 'model.decoder.layers.2.self_attn.v_proj.weight': 7.255269156303257e-05, 'model.decoder.layers.2.self_attn.v_proj.bias': 0.0014476566575467587, 'model.decoder.layers.2.self_attn.q_proj.weight': 6.528708581754472e-06, 'model.decoder.layers.2.self_attn.q_proj.bias': 0.001365584321320057, 'model.decoder.layers.2.self_attn.out_proj.weight': 1.976549538085237e-05, 'model.decoder.layers.2.self_attn.out_proj.bias': 0.009294230490922928, 'model.decoder.layers.2.self_attn_layer_norm.weight': 0.000535756116732955, 'model.decoder.layers.2.self_attn_layer_norm.bias': 0.001856120303273201, 'model.decoder.layers.2.fc1.weight': 3.177683538524434e-05, 'model.decoder.layers.2.fc1.bias': 2.412831236142665e-05, 'model.decoder.layers.2.fc2.weight': 1.818887085391907e-07, 'model.decoder.layers.2.fc2.bias': 0.022226765751838684, 'model.decoder.layers.2.final_layer_norm.weight': 0.007943467237055302, 'model.decoder.layers.2.final_layer_norm.bias': 0.0022404014598578215, 'model.decoder.layers.3.self_attn.k_proj.weight': 1.151437663793331e-05, 'model.decoder.layers.3.self_attn.k_proj.bias': 3.3940850130420586e-10, 'model.decoder.layers.3.self_attn.v_proj.weight': 4.8154910473385826e-05, 'model.decoder.layers.3.self_attn.v_proj.bias': 0.01699797995388508, 'model.decoder.layers.3.self_attn.q_proj.weight': 2.3673514078836888e-05, 'model.decoder.layers.3.self_attn.q_proj.bias': 0.0005298053729347885, 'model.decoder.layers.3.self_attn.out_proj.weight': 2.1154875867068768e-05, 'model.decoder.layers.3.self_attn.out_proj.bias': 0.0014614267274737358, 'model.decoder.layers.3.self_attn_layer_norm.weight': 0.005598359275609255, 'model.decoder.layers.3.self_attn_layer_norm.bias': 0.000933143135625869, 'model.decoder.layers.3.fc1.weight': 4.553344115265645e-05, 'model.decoder.layers.3.fc1.bias': 0.0012870586942881346, 'model.decoder.layers.3.fc2.weight': 1.06591141957324e-05, 'model.decoder.layers.3.fc2.bias': 0.012304415926337242, 'model.decoder.layers.3.final_layer_norm.weight': 0.0007680617272853851, 'model.decoder.layers.3.final_layer_norm.bias': 0.005956999026238918, 'model.decoder.layers.4.self_attn.k_proj.weight': 2.699195647437591e-06, 'model.decoder.layers.4.self_attn.k_proj.bias': 7.5040951230676e-10, 'model.decoder.layers.4.self_attn.v_proj.weight': 0.00021728238789364696, 'model.decoder.layers.4.self_attn.v_proj.bias': 0.0034294729121029377, 'model.decoder.layers.4.self_attn.q_proj.weight': 4.4106283894507214e-05, 'model.decoder.layers.4.self_attn.q_proj.bias': 0.0004717133124358952, 'model.decoder.layers.4.self_attn.out_proj.weight': 3.1982865039026365e-05, 'model.decoder.layers.4.self_attn.out_proj.bias': 0.04290454462170601, 'model.decoder.layers.4.self_attn_layer_norm.weight': 0.0024552810937166214, 'model.decoder.layers.4.self_attn_layer_norm.bias': 0.0026056580245494843, 'model.decoder.layers.4.fc1.weight': 3.265196210122667e-05, 'model.decoder.layers.4.fc1.bias': 0.0013350797817111015, 'model.decoder.layers.4.fc2.weight': 6.125213985797018e-06, 'model.decoder.layers.4.fc2.bias': 0.0204729363322258, 'model.decoder.layers.4.final_layer_norm.weight': 0.006313554476946592, 'model.decoder.layers.4.final_layer_norm.bias': 0.0018668060656636953, 'model.decoder.layers.5.self_attn.k_proj.weight': 1.9550689103198238e-05, 'model.decoder.layers.5.self_attn.k_proj.bias': 4.5481396426794163e-10, 'model.decoder.layers.5.self_attn.v_proj.weight': 0.0002496642409823835, 'model.decoder.layers.5.self_attn.v_proj.bias': 0.002697425428777933, 'model.decoder.layers.5.self_attn.q_proj.weight': 1.265824175789021e-05, 'model.decoder.layers.5.self_attn.q_proj.bias': 0.0002674320712685585, 'model.decoder.layers.5.self_attn.out_proj.weight': 0.00011045326391467825, 'model.decoder.layers.5.self_attn.out_proj.bias': 0.014382628723978996, 'model.decoder.layers.5.self_attn_layer_norm.weight': 0.001466980203986168, 'model.decoder.layers.5.self_attn_layer_norm.bias': 0.0034044026397168636, 'model.decoder.layers.5.fc1.weight': 5.873205282114213e-06, 'model.decoder.layers.5.fc1.bias': 8.20957066025585e-05, 'model.decoder.layers.5.fc2.weight': 4.40867825091118e-06, 'model.decoder.layers.5.fc2.bias': 0.018928296864032745, 'model.decoder.layers.5.final_layer_norm.weight': 0.0011564877349883318, 'model.decoder.layers.5.final_layer_norm.bias': 0.0016948427073657513, 'model.decoder.layers.6.self_attn.k_proj.weight': 1.5418418115586974e-06, 'model.decoder.layers.6.self_attn.k_proj.bias': 2.9045921223769255e-10, 'model.decoder.layers.6.self_attn.v_proj.weight': 0.0007602623663842678, 'model.decoder.layers.6.self_attn.v_proj.bias': 0.011044119484722614, 'model.decoder.layers.6.self_attn.q_proj.weight': 2.2584390535485e-07, 'model.decoder.layers.6.self_attn.q_proj.bias': 0.0008485889993607998, 'model.decoder.layers.6.self_attn.out_proj.weight': 3.578695759642869e-05, 'model.decoder.layers.6.self_attn.out_proj.bias': 0.008568225428462029, 'model.decoder.layers.6.self_attn_layer_norm.weight': 0.00713425362482667, 'model.decoder.layers.6.self_attn_layer_norm.bias': 0.014377156272530556, 'model.decoder.layers.6.fc1.weight': 4.1227933252230287e-05, 'model.decoder.layers.6.fc1.bias': 0.00033436791272833943, 'model.decoder.layers.6.fc2.weight': 6.13882457400905e-06, 'model.decoder.layers.6.fc2.bias': 0.018310433253645897, 'model.decoder.layers.6.final_layer_norm.weight': 0.0007947176927700639, 'model.decoder.layers.6.final_layer_norm.bias': 0.004018186591565609, 'model.decoder.layers.7.self_attn.k_proj.weight': 1.8655551684787497e-05, 'model.decoder.layers.7.self_attn.k_proj.bias': 4.4433434709389985e-10, 'model.decoder.layers.7.self_attn.v_proj.weight': 0.0004696718533523381, 'model.decoder.layers.7.self_attn.v_proj.bias': 0.010697084479033947, 'model.decoder.layers.7.self_attn.q_proj.weight': 1.0651410775608383e-05, 'model.decoder.layers.7.self_attn.q_proj.bias': 0.0009541820036247373, 'model.decoder.layers.7.self_attn.out_proj.weight': 2.0471183233894408e-05, 'model.decoder.layers.7.self_attn.out_proj.bias': 0.04106755927205086, 'model.decoder.layers.7.self_attn_layer_norm.weight': 0.003637113608419895, 'model.decoder.layers.7.self_attn_layer_norm.bias': 0.003511386923491955, 'model.decoder.layers.7.fc1.weight': 3.541999467415735e-05, 'model.decoder.layers.7.fc1.bias': 0.00017271609976887703, 'model.decoder.layers.7.fc2.weight': 6.586087692994624e-07, 'model.decoder.layers.7.fc2.bias': 0.0007914276793599129, 'model.decoder.layers.7.final_layer_norm.weight': 0.0016061868518590927, 'model.decoder.layers.7.final_layer_norm.bias': 0.0006164757069200277, 'model.decoder.layers.8.self_attn.k_proj.weight': 1.312786935159238e-05, 'model.decoder.layers.8.self_attn.k_proj.bias': 4.789022511886287e-10, 'model.decoder.layers.8.self_attn.v_proj.weight': 0.00083456642460078, 'model.decoder.layers.8.self_attn.v_proj.bias': 0.0013190273893997073, 'model.decoder.layers.8.self_attn.q_proj.weight': 4.527746204985306e-05, 'model.decoder.layers.8.self_attn.q_proj.bias': 0.0003894694964401424, 'model.decoder.layers.8.self_attn.out_proj.weight': 8.155950126820244e-06, 'model.decoder.layers.8.self_attn.out_proj.bias': 0.031470075249671936, 'model.decoder.layers.8.self_attn_layer_norm.weight': 0.0021222857758402824, 'model.decoder.layers.8.self_attn_layer_norm.bias': 0.0019304797751829028, 'model.decoder.layers.8.fc1.weight': 2.3366730601992458e-05, 'model.decoder.layers.8.fc1.bias': 0.0016706563765183091, 'model.decoder.layers.8.fc2.weight': 1.011752010526834e-05, 'model.decoder.layers.8.fc2.bias': 0.015386030077934265, 'model.decoder.layers.8.final_layer_norm.weight': 0.004658102057874203, 'model.decoder.layers.8.final_layer_norm.bias': 0.006879071705043316, 'model.decoder.layers.9.self_attn.k_proj.weight': 1.7018075595842674e-05, 'model.decoder.layers.9.self_attn.k_proj.bias': 7.391953715796262e-11, 'model.decoder.layers.9.self_attn.v_proj.weight': 0.0006141822086647153, 'model.decoder.layers.9.self_attn.v_proj.bias': 0.004593817517161369, 'model.decoder.layers.9.self_attn.q_proj.weight': 2.1341147657949477e-05, 'model.decoder.layers.9.self_attn.q_proj.bias': 0.0019551655277609825, 'model.decoder.layers.9.self_attn.out_proj.weight': 5.762240107287653e-05, 'model.decoder.layers.9.self_attn.out_proj.bias': 0.008541066199541092, 'model.decoder.layers.9.self_attn_layer_norm.weight': 0.00260370085015893, 'model.decoder.layers.9.self_attn_layer_norm.bias': 0.002429370302706957, 'model.decoder.layers.9.fc1.weight': 1.5558865925413556e-05, 'model.decoder.layers.9.fc1.bias': 0.0016150367446243763, 'model.decoder.layers.9.fc2.weight': 1.7371028661727905e-05, 'model.decoder.layers.9.fc2.bias': 0.0010262508876621723, 'model.decoder.layers.9.final_layer_norm.weight': 0.0006154445582069457, 'model.decoder.layers.9.final_layer_norm.bias': 0.004642384126782417, 'model.decoder.layers.10.self_attn.k_proj.weight': 1.7195801774505526e-05, 'model.decoder.layers.10.self_attn.k_proj.bias': 7.461231632532872e-11, 'model.decoder.layers.10.self_attn.v_proj.weight': 0.0006180479540489614, 'model.decoder.layers.10.self_attn.v_proj.bias': 0.0049537960439920425, 'model.decoder.layers.10.self_attn.q_proj.weight': 1.5819550753803924e-06, 'model.decoder.layers.10.self_attn.q_proj.bias': 0.0015959527809172869, 'model.decoder.layers.10.self_attn.out_proj.weight': 7.227572496049106e-05, 'model.decoder.layers.10.self_attn.out_proj.bias': 0.0029820981435477734, 'model.decoder.layers.10.self_attn_layer_norm.weight': 0.005492689553648233, 'model.decoder.layers.10.self_attn_layer_norm.bias': 0.0049917567521333694, 'model.decoder.layers.10.fc1.weight': 1.7441607269574888e-06, 'model.decoder.layers.10.fc1.bias': 0.000596581376157701, 'model.decoder.layers.10.fc2.weight': 8.243401680374518e-06, 'model.decoder.layers.10.fc2.bias': 0.0008291625417768955, 'model.decoder.layers.10.final_layer_norm.weight': 0.0004895663587376475, 'model.decoder.layers.10.final_layer_norm.bias': 0.005814571864902973, 'model.decoder.layers.11.self_attn.k_proj.weight': 1.6726547983125784e-05, 'model.decoder.layers.11.self_attn.k_proj.bias': 7.352944919603033e-10, 'model.decoder.layers.11.self_attn.v_proj.weight': 0.0005792105803266168, 'model.decoder.layers.11.self_attn.v_proj.bias': 0.0014454503543674946, 'model.decoder.layers.11.self_attn.q_proj.weight': 1.984708069358021e-05, 'model.decoder.layers.11.self_attn.q_proj.bias': 0.0017196566332131624, 'model.decoder.layers.11.self_attn.out_proj.weight': 7.973126048455015e-05, 'model.decoder.layers.11.self_attn.out_proj.bias': 0.0002526906318962574, 'model.decoder.layers.11.self_attn_layer_norm.weight': 0.0006384402513504028, 'model.decoder.layers.11.self_attn_layer_norm.bias': 0.014260699972510338, 'model.decoder.layers.11.fc1.weight': 3.445312177063897e-05, 'model.decoder.layers.11.fc1.bias': 0.00018588214879855514, 'model.decoder.layers.11.fc2.weight': 2.20234619519033e-06, 'model.decoder.layers.11.fc2.bias': 0.023138443008065224, 'model.decoder.layers.11.final_layer_norm.weight': 0.0038702962920069695, 'model.decoder.layers.11.final_layer_norm.bias': 0.0020405559334903955, 'model.decoder.layers.12.self_attn.k_proj.weight': 5.6471722018613946e-06, 'model.decoder.layers.12.self_attn.k_proj.bias': 5.683409298740116e-10, 'model.decoder.layers.12.self_attn.v_proj.weight': 0.0003623973170761019, 'model.decoder.layers.12.self_attn.v_proj.bias': 0.004810386803001165, 'model.decoder.layers.12.self_attn.q_proj.weight': 2.4387672965531237e-05, 'model.decoder.layers.12.self_attn.q_proj.bias': 0.0006049377843737602, 'model.decoder.layers.12.self_attn.out_proj.weight': 2.0980100089218467e-05, 'model.decoder.layers.12.self_attn.out_proj.bias': 0.0037242495454847813, 'model.decoder.layers.12.self_attn_layer_norm.weight': 0.0002736416645348072, 'model.decoder.layers.12.self_attn_layer_norm.bias': 0.0108397351577878, 'model.decoder.layers.12.fc1.weight': 4.7448615077883005e-05, 'model.decoder.layers.12.fc1.bias': 0.0008975025848485529, 'model.decoder.layers.12.fc2.weight': 2.871520700864494e-05, 'model.decoder.layers.12.fc2.bias': 0.011479119770228863, 'model.decoder.layers.12.final_layer_norm.weight': 0.0033557522110641003, 'model.decoder.layers.12.final_layer_norm.bias': 0.004673722665756941, 'model.decoder.layers.13.self_attn.k_proj.weight': 0.00011653665569610894, 'model.decoder.layers.13.self_attn.k_proj.bias': 1.7158043874587747e-09, 'model.decoder.layers.13.self_attn.v_proj.weight': 0.0002871536125894636, 'model.decoder.layers.13.self_attn.v_proj.bias': 0.0035168619360774755, 'model.decoder.layers.13.self_attn.q_proj.weight': 4.61002164229285e-05, 'model.decoder.layers.13.self_attn.q_proj.bias': 0.0004209924372844398, 'model.decoder.layers.13.self_attn.out_proj.weight': 4.131090099690482e-05, 'model.decoder.layers.13.self_attn.out_proj.bias': 0.00010073347948491573, 'model.decoder.layers.13.self_attn_layer_norm.weight': 0.003654373809695244, 'model.decoder.layers.13.self_attn_layer_norm.bias': 0.005862885154783726, 'model.decoder.layers.13.fc1.weight': 4.259213892510161e-05, 'model.decoder.layers.13.fc1.bias': 0.00028196259518153965, 'model.decoder.layers.13.fc2.weight': 1.934450892804307e-06, 'model.decoder.layers.13.fc2.bias': 0.006977382116019726, 'model.decoder.layers.13.final_layer_norm.weight': 0.004070748575031757, 'model.decoder.layers.13.final_layer_norm.bias': 0.0012171454727649689, 'model.decoder.layers.14.self_attn.k_proj.weight': 2.5059925974346697e-05, 'model.decoder.layers.14.self_attn.k_proj.bias': 7.952873914973679e-10, 'model.decoder.layers.14.self_attn.v_proj.weight': 9.57314478000626e-05, 'model.decoder.layers.14.self_attn.v_proj.bias': 0.009776213206350803, 'model.decoder.layers.14.self_attn.q_proj.weight': 9.453647180635016e-06, 'model.decoder.layers.14.self_attn.q_proj.bias': 3.741658292710781e-05, 'model.decoder.layers.14.self_attn.out_proj.weight': 4.4570078898686916e-05, 'model.decoder.layers.14.self_attn.out_proj.bias': 0.004945332184433937, 'model.decoder.layers.14.self_attn_layer_norm.weight': 0.0027952108066529036, 'model.decoder.layers.14.self_attn_layer_norm.bias': 0.0017658316064625978, 'model.decoder.layers.14.fc1.weight': 2.2840002202428877e-05, 'model.decoder.layers.14.fc1.bias': 0.00041145997238345444, 'model.decoder.layers.14.fc2.weight': 9.089013474294916e-06, 'model.decoder.layers.14.fc2.bias': 0.0013153913896530867, 'model.decoder.layers.14.final_layer_norm.weight': 0.00015770667232573032, 'model.decoder.layers.14.final_layer_norm.bias': 0.0003043359611183405, 'model.decoder.layers.15.self_attn.k_proj.weight': 1.691165925876703e-05, 'model.decoder.layers.15.self_attn.k_proj.bias': 1.7975807509174047e-09, 'model.decoder.layers.15.self_attn.v_proj.weight': 6.160035263746977e-05, 'model.decoder.layers.15.self_attn.v_proj.bias': 0.004183325916528702, 'model.decoder.layers.15.self_attn.q_proj.weight': 9.07519097381737e-06, 'model.decoder.layers.15.self_attn.q_proj.bias': 0.001580888987518847, 'model.decoder.layers.15.self_attn.out_proj.weight': 4.506875484366901e-06, 'model.decoder.layers.15.self_attn.out_proj.bias': 0.00036701885983347893, 'model.decoder.layers.15.self_attn_layer_norm.weight': 0.0014215430710464716, 'model.decoder.layers.15.self_attn_layer_norm.bias': 0.000428510713391006, 'model.decoder.layers.15.fc1.weight': 8.200007869163528e-06, 'model.decoder.layers.15.fc1.bias': 0.0004673587391152978, 'model.decoder.layers.15.fc2.weight': 9.591492016625125e-06, 'model.decoder.layers.15.fc2.bias': 0.008664260618388653, 'model.decoder.layers.15.final_layer_norm.weight': 0.00045877337106503546, 'model.decoder.layers.15.final_layer_norm.bias': 3.923231270164251e-05, 'model.decoder.layers.16.self_attn.k_proj.weight': 8.037301995500457e-06, 'model.decoder.layers.16.self_attn.k_proj.bias': 3.4562219752842793e-10, 'model.decoder.layers.16.self_attn.v_proj.weight': 0.0003743779961951077, 'model.decoder.layers.16.self_attn.v_proj.bias': 0.005785172805190086, 'model.decoder.layers.16.self_attn.q_proj.weight': 2.8230115276528522e-05, 'model.decoder.layers.16.self_attn.q_proj.bias': 0.00019174485350959003, 'model.decoder.layers.16.self_attn.out_proj.weight': 1.0935635145870037e-05, 'model.decoder.layers.16.self_attn.out_proj.bias': 0.003971535246819258, 'model.decoder.layers.16.self_attn_layer_norm.weight': 0.00021049194037914276, 'model.decoder.layers.16.self_attn_layer_norm.bias': 0.0005579772405326366, 'model.decoder.layers.16.fc1.weight': 1.307657021243358e-05, 'model.decoder.layers.16.fc1.bias': 0.00021872477373108268, 'model.decoder.layers.16.fc2.weight': 9.013900125864893e-06, 'model.decoder.layers.16.fc2.bias': 0.006086578592658043, 'model.decoder.layers.16.final_layer_norm.weight': 0.000611298019066453, 'model.decoder.layers.16.final_layer_norm.bias': 0.0002765201497823, 'model.decoder.layers.17.self_attn.k_proj.weight': 2.1650248527294025e-05, 'model.decoder.layers.17.self_attn.k_proj.bias': 1.0046452558754027e-10, 'model.decoder.layers.17.self_attn.v_proj.weight': 0.00011780836939578876, 'model.decoder.layers.17.self_attn.v_proj.bias': 0.009902569465339184, 'model.decoder.layers.17.self_attn.q_proj.weight': 9.811848940444179e-06, 'model.decoder.layers.17.self_attn.q_proj.bias': 0.0010778481373563409, 'model.decoder.layers.17.self_attn.out_proj.weight': 3.1169627618510276e-05, 'model.decoder.layers.17.self_attn.out_proj.bias': 0.0014936437364667654, 'model.decoder.layers.17.self_attn_layer_norm.weight': 0.0011217626743018627, 'model.decoder.layers.17.self_attn_layer_norm.bias': 0.005995844025164843, 'model.decoder.layers.17.fc1.weight': 1.495193373557413e-05, 'model.decoder.layers.17.fc1.bias': 8.811787120066583e-05, 'model.decoder.layers.17.fc2.weight': 5.135842684467207e-07, 'model.decoder.layers.17.fc2.bias': 0.004266152158379555, 'model.decoder.layers.17.final_layer_norm.weight': 0.0015217175241559744, 'model.decoder.layers.17.final_layer_norm.bias': 0.0006807037862017751, 'model.decoder.layers.18.self_attn.k_proj.weight': 8.06709340395173e-06, 'model.decoder.layers.18.self_attn.k_proj.bias': 2.382538610845586e-10, 'model.decoder.layers.18.self_attn.v_proj.weight': 0.00010729266796261072, 'model.decoder.layers.18.self_attn.v_proj.bias': 0.0002375934855081141, 'model.decoder.layers.18.self_attn.q_proj.weight': 1.3824549569108058e-05, 'model.decoder.layers.18.self_attn.q_proj.bias': 0.0008267344674095511, 'model.decoder.layers.18.self_attn.out_proj.weight': 4.728955536847934e-05, 'model.decoder.layers.18.self_attn.out_proj.bias': 0.00023239030269905925, 'model.decoder.layers.18.self_attn_layer_norm.weight': 0.0017888545989990234, 'model.decoder.layers.18.self_attn_layer_norm.bias': 0.0026490602176636457, 'model.decoder.layers.18.fc1.weight': 2.582679189799819e-05, 'model.decoder.layers.18.fc1.bias': 1.660519774304703e-05, 'model.decoder.layers.18.fc2.weight': 7.1948597906157374e-06, 'model.decoder.layers.18.fc2.bias': 0.0015473762759938836, 'model.decoder.layers.18.final_layer_norm.weight': 9.034632239490747e-05, 'model.decoder.layers.18.final_layer_norm.bias': 0.0019304485758766532, 'model.decoder.layers.19.self_attn.k_proj.weight': 2.0197403500787914e-05, 'model.decoder.layers.19.self_attn.k_proj.bias': 1.303785968076454e-10, 'model.decoder.layers.19.self_attn.v_proj.weight': 0.0002107986219925806, 'model.decoder.layers.19.self_attn.v_proj.bias': 0.0008111624629236758, 'model.decoder.layers.19.self_attn.q_proj.weight': 1.4097247913014144e-05, 'model.decoder.layers.19.self_attn.q_proj.bias': 4.587244620779529e-05, 'model.decoder.layers.19.self_attn.out_proj.weight': 3.4978158510057256e-05, 'model.decoder.layers.19.self_attn.out_proj.bias': 0.0005899042589589953, 'model.decoder.layers.19.self_attn_layer_norm.weight': 0.0013289607595652342, 'model.decoder.layers.19.self_attn_layer_norm.bias': 0.0023845075629651546, 'model.decoder.layers.19.fc1.weight': 1.2554923159768805e-05, 'model.decoder.layers.19.fc1.bias': 0.0001672703365329653, 'model.decoder.layers.19.fc2.weight': 6.046268026693724e-06, 'model.decoder.layers.19.fc2.bias': 0.002383989281952381, 'model.decoder.layers.19.final_layer_norm.weight': 0.0005191607633605599, 'model.decoder.layers.19.final_layer_norm.bias': 0.0011610982473939657, 'model.decoder.layers.20.self_attn.k_proj.weight': 2.1556634237640537e-05, 'model.decoder.layers.20.self_attn.k_proj.bias': 1.950688499618991e-10, 'model.decoder.layers.20.self_attn.v_proj.weight': 6.871994264656678e-05, 'model.decoder.layers.20.self_attn.v_proj.bias': 0.0016091763973236084, 'model.decoder.layers.20.self_attn.q_proj.weight': 1.1541615094756708e-05, 'model.decoder.layers.20.self_attn.q_proj.bias': 0.0002420599339529872, 'model.decoder.layers.20.self_attn.out_proj.weight': 6.485556696134154e-06, 'model.decoder.layers.20.self_attn.out_proj.bias': 0.0017362730577588081, 'model.decoder.layers.20.self_attn_layer_norm.weight': 0.000966782565228641, 'model.decoder.layers.20.self_attn_layer_norm.bias': 0.0024521793238818645, 'model.decoder.layers.20.fc1.weight': 2.220020178356208e-06, 'model.decoder.layers.20.fc1.bias': 0.00023903901455923915, 'model.decoder.layers.20.fc2.weight': 5.11652751811198e-06, 'model.decoder.layers.20.fc2.bias': 0.001610452076420188, 'model.decoder.layers.20.final_layer_norm.weight': 0.0006391120259650052, 'model.decoder.layers.20.final_layer_norm.bias': 0.00036247906973585486, 'model.decoder.layers.21.self_attn.k_proj.weight': 2.875465725082904e-06, 'model.decoder.layers.21.self_attn.k_proj.bias': 1.0963829844001793e-10, 'model.decoder.layers.21.self_attn.v_proj.weight': 0.0001076810949598439, 'model.decoder.layers.21.self_attn.v_proj.bias': 0.0012785817962139845, 'model.decoder.layers.21.self_attn.q_proj.weight': 5.267726010060869e-06, 'model.decoder.layers.21.self_attn.q_proj.bias': 0.0005636128480546176, 'model.decoder.layers.21.self_attn.out_proj.weight': 1.0467017091286834e-05, 'model.decoder.layers.21.self_attn.out_proj.bias': 0.0006349491886794567, 'model.decoder.layers.21.self_attn_layer_norm.weight': 9.788910392671824e-05, 'model.decoder.layers.21.self_attn_layer_norm.bias': 0.0018109140219166875, 'model.decoder.layers.21.fc1.weight': 1.0524420758883934e-05, 'model.decoder.layers.21.fc1.bias': 0.0001395009458065033, 'model.decoder.layers.21.fc2.weight': 7.825683496776037e-07, 'model.decoder.layers.21.fc2.bias': 0.00027287568082101643, 'model.decoder.layers.21.final_layer_norm.weight': 0.002189337508752942, 'model.decoder.layers.21.final_layer_norm.bias': 0.0003936631546821445, 'model.decoder.layers.22.self_attn.k_proj.weight': 7.157395884860307e-06, 'model.decoder.layers.22.self_attn.k_proj.bias': 1.2284405714879654e-09, 'model.decoder.layers.22.self_attn.v_proj.weight': 0.0001079331268556416, 'model.decoder.layers.22.self_attn.v_proj.bias': 0.0024641603231430054, 'model.decoder.layers.22.self_attn.q_proj.weight': 2.9348946554819122e-05, 'model.decoder.layers.22.self_attn.q_proj.bias': 0.00046467551146633923, 'model.decoder.layers.22.self_attn.out_proj.weight': 1.1630811059148982e-05, 'model.decoder.layers.22.self_attn.out_proj.bias': 1.3318858691491187e-05, 'model.decoder.layers.22.self_attn_layer_norm.weight': 0.0024139797315001488, 'model.decoder.layers.22.self_attn_layer_norm.bias': 0.0008520184201188385, 'model.decoder.layers.22.fc1.weight': 1.299438372370787e-05, 'model.decoder.layers.22.fc1.bias': 6.156826566439122e-05, 'model.decoder.layers.22.fc2.weight': 6.837515229562996e-06, 'model.decoder.layers.22.fc2.bias': 0.000750359205994755, 'model.decoder.layers.22.final_layer_norm.weight': 0.0005745317903347313, 'model.decoder.layers.22.final_layer_norm.bias': 0.00038529568701051176, 'model.decoder.layers.23.self_attn.k_proj.weight': 0.0008154936949722469, 'model.decoder.layers.23.self_attn.k_proj.bias': 1.5340060599555727e-07, 'model.decoder.layers.23.self_attn.v_proj.weight': 1.653862273087725e-05, 'model.decoder.layers.23.self_attn.v_proj.bias': 0.0008790317224338651, 'model.decoder.layers.23.self_attn.q_proj.weight': 9.975660759664606e-06, 'model.decoder.layers.23.self_attn.q_proj.bias': 0.0004680745187215507, 'model.decoder.layers.23.self_attn.out_proj.weight': 2.2089188860263675e-06, 'model.decoder.layers.23.self_attn.out_proj.bias': 0.00054021121468395, 'model.decoder.layers.23.self_attn_layer_norm.weight': 0.05621916800737381, 'model.decoder.layers.23.self_attn_layer_norm.bias': 0.0024355368223041296, 'model.decoder.layers.23.fc1.weight': 1.85953795153182e-05, 'model.decoder.layers.23.fc1.bias': 9.960689203580841e-05, 'model.decoder.layers.23.fc2.weight': 1.5280495517799864e-06, 'model.decoder.layers.23.fc2.bias': 0.0008676279685460031, 'model.decoder.layers.23.final_layer_norm.weight': 0.0021101871971040964, 'model.decoder.layers.23.final_layer_norm.bias': 0.0014080661348998547}\n",
    "hessian_trace =  {'model.decoder.embed_tokens.weight': 1554.5933227539062, 'model.decoder.embed_positions.weight': 329.3562469482422, 'model.decoder.final_layer_norm.weight': 2.057377278804779, 'model.decoder.final_layer_norm.bias': 2.215784430503845, 'model.decoder.layers.0.self_attn.k_proj.weight': 5.959098815917969, 'model.decoder.layers.0.self_attn.k_proj.bias': 1.7815805222198833e-06, 'model.decoder.layers.0.self_attn.v_proj.weight': 155.4822769165039, 'model.decoder.layers.0.self_attn.v_proj.bias': 42.346364974975586, 'model.decoder.layers.0.self_attn.q_proj.weight': 36.097707748413086, 'model.decoder.layers.0.self_attn.q_proj.bias': 0.7161131501197815, 'model.decoder.layers.0.self_attn.out_proj.weight': 578.2647705078125, 'model.decoder.layers.0.self_attn.out_proj.bias': 24.615886688232422, 'model.decoder.layers.0.self_attn_layer_norm.weight': 5.630899906158447, 'model.decoder.layers.0.self_attn_layer_norm.bias': 3.613467216491699, 'model.decoder.layers.0.fc1.weight': 1407.4449996948242, 'model.decoder.layers.0.fc1.bias': 13.40324592590332, 'model.decoder.layers.0.fc2.weight': 432.50433349609375, 'model.decoder.layers.0.fc2.bias': 41.77509117126465, 'model.decoder.layers.0.final_layer_norm.weight': 6.353864669799805, 'model.decoder.layers.0.final_layer_norm.bias': 5.838199853897095, 'model.decoder.layers.1.self_attn.k_proj.weight': 5.609800338745117, 'model.decoder.layers.1.self_attn.k_proj.bias': 3.0752916302390076e-07, 'model.decoder.layers.1.self_attn.v_proj.weight': 1947.972412109375, 'model.decoder.layers.1.self_attn.v_proj.bias': 11.536206245422363, 'model.decoder.layers.1.self_attn.q_proj.weight': 58.153066635131836, 'model.decoder.layers.1.self_attn.q_proj.bias': 0.08871698379516602, 'model.decoder.layers.1.self_attn.out_proj.weight': 122.70708847045898, 'model.decoder.layers.1.self_attn.out_proj.bias': 36.88761901855469, 'model.decoder.layers.1.self_attn_layer_norm.weight': 1.7729547023773193, 'model.decoder.layers.1.self_attn_layer_norm.bias': 4.077762126922607, 'model.decoder.layers.1.fc1.weight': 53.148040771484375, 'model.decoder.layers.1.fc1.bias': 11.792776107788086, 'model.decoder.layers.1.fc2.weight': 112.98832321166992, 'model.decoder.layers.1.fc2.bias': 14.23758602142334, 'model.decoder.layers.1.final_layer_norm.weight': 6.522412419319153, 'model.decoder.layers.1.final_layer_norm.bias': 2.1635074615478516, 'model.decoder.layers.2.self_attn.k_proj.weight': 2.582994222640991, 'model.decoder.layers.2.self_attn.k_proj.bias': 1.1190908821845369e-07, 'model.decoder.layers.2.self_attn.v_proj.weight': 1141.974609375, 'model.decoder.layers.2.self_attn.v_proj.bias': 12.205239653587341, 'model.decoder.layers.2.self_attn.q_proj.weight': 40.1053352355957, 'model.decoder.layers.2.self_attn.q_proj.bias': 0.07917895913124084, 'model.decoder.layers.2.self_attn.out_proj.weight': 35.13606929779053, 'model.decoder.layers.2.self_attn.out_proj.bias': 22.22556781768799, 'model.decoder.layers.2.self_attn_layer_norm.weight': 2.9950828552246094, 'model.decoder.layers.2.self_attn_layer_norm.bias': 3.671904444694519, 'model.decoder.layers.2.fc1.weight': 209.83714294433594, 'model.decoder.layers.2.fc1.bias': 1.4389972984790802, 'model.decoder.layers.2.fc2.weight': 16.237722396850586, 'model.decoder.layers.2.fc2.bias': 11.905595541000366, 'model.decoder.layers.2.final_layer_norm.weight': 5.728177070617676, 'model.decoder.layers.2.final_layer_norm.bias': 3.456906110048294, 'model.decoder.layers.3.self_attn.k_proj.weight': 23.879831314086914, 'model.decoder.layers.3.self_attn.k_proj.bias': 6.522961371047131e-07, 'model.decoder.layers.3.self_attn.v_proj.weight': 1312.0902099609375, 'model.decoder.layers.3.self_attn.v_proj.bias': 2.9325156211853027, 'model.decoder.layers.3.self_attn.q_proj.weight': 11.476224899291992, 'model.decoder.layers.3.self_attn.q_proj.bias': 0.12640252709388733, 'model.decoder.layers.3.self_attn.out_proj.weight': 4.092090606689453, 'model.decoder.layers.3.self_attn.out_proj.bias': 8.711282014846802, 'model.decoder.layers.3.self_attn_layer_norm.weight': 3.1589415073394775, 'model.decoder.layers.3.self_attn_layer_norm.bias': 0.9726977348327637, 'model.decoder.layers.3.fc1.weight': 268.2931594848633, 'model.decoder.layers.3.fc1.bias': 3.855536460876465, 'model.decoder.layers.3.fc2.weight': 9.389429092407227, 'model.decoder.layers.3.fc2.bias': 18.506755828857422, 'model.decoder.layers.3.final_layer_norm.weight': 1.3052382469177246, 'model.decoder.layers.3.final_layer_norm.bias': 5.370193719863892, 'model.decoder.layers.4.self_attn.k_proj.weight': 31.703463554382324, 'model.decoder.layers.4.self_attn.k_proj.bias': 7.028519064533612e-07, 'model.decoder.layers.4.self_attn.v_proj.weight': 1321.0108947753906, 'model.decoder.layers.4.self_attn.v_proj.bias': 3.770785927772522, 'model.decoder.layers.4.self_attn.q_proj.weight': 54.640886306762695, 'model.decoder.layers.4.self_attn.q_proj.bias': 0.9349321722984314, 'model.decoder.layers.4.self_attn.out_proj.weight': 41.241634368896484, 'model.decoder.layers.4.self_attn.out_proj.bias': 2.445488452911377, 'model.decoder.layers.4.self_attn_layer_norm.weight': 3.169499397277832, 'model.decoder.layers.4.self_attn_layer_norm.bias': 4.908833980560303, 'model.decoder.layers.4.fc1.weight': 85.29711151123047, 'model.decoder.layers.4.fc1.bias': 5.859738707542419, 'model.decoder.layers.4.fc2.weight': 45.9865403175354, 'model.decoder.layers.4.fc2.bias': 6.12828803062439, 'model.decoder.layers.4.final_layer_norm.weight': 3.5777411460876465, 'model.decoder.layers.4.final_layer_norm.bias': 4.706707000732422, 'model.decoder.layers.5.self_attn.k_proj.weight': 24.822002410888672, 'model.decoder.layers.5.self_attn.k_proj.bias': 3.834725674778383e-07, 'model.decoder.layers.5.self_attn.v_proj.weight': 1820.896728515625, 'model.decoder.layers.5.self_attn.v_proj.bias': 2.874998927116394, 'model.decoder.layers.5.self_attn.q_proj.weight': 41.00781059265137, 'model.decoder.layers.5.self_attn.q_proj.bias': 0.01809464395046234, 'model.decoder.layers.5.self_attn.out_proj.weight': 89.42065858840942, 'model.decoder.layers.5.self_attn.out_proj.bias': 0.03385138511657715, 'model.decoder.layers.5.self_attn_layer_norm.weight': 0.5889009237289429, 'model.decoder.layers.5.self_attn_layer_norm.bias': 5.95812052488327, 'model.decoder.layers.5.fc1.weight': 459.3031005859375, 'model.decoder.layers.5.fc1.bias': 2.7095339596271515, 'model.decoder.layers.5.fc2.weight': 62.684226989746094, 'model.decoder.layers.5.fc2.bias': 1.5516939163208008, 'model.decoder.layers.5.final_layer_norm.weight': 0.14291080832481384, 'model.decoder.layers.5.final_layer_norm.bias': 6.614803314208984, 'model.decoder.layers.6.self_attn.k_proj.weight': 18.307182788848877, 'model.decoder.layers.6.self_attn.k_proj.bias': 1.5350670423686097e-07, 'model.decoder.layers.6.self_attn.v_proj.weight': 3089.83056640625, 'model.decoder.layers.6.self_attn.v_proj.bias': 4.662912368774414, 'model.decoder.layers.6.self_attn.q_proj.weight': 27.535000801086426, 'model.decoder.layers.6.self_attn.q_proj.bias': 0.03767353296279907, 'model.decoder.layers.6.self_attn.out_proj.weight': 19.38718605041504, 'model.decoder.layers.6.self_attn.out_proj.bias': 16.31098961830139, 'model.decoder.layers.6.self_attn_layer_norm.weight': 0.9867374897003174, 'model.decoder.layers.6.self_attn_layer_norm.bias': 4.605532646179199, 'model.decoder.layers.6.fc1.weight': 333.1764831542969, 'model.decoder.layers.6.fc1.bias': 0.6722406148910522, 'model.decoder.layers.6.fc2.weight': 11.636143684387207, 'model.decoder.layers.6.fc2.bias': 10.138243794441223, 'model.decoder.layers.6.final_layer_norm.weight': 1.0856136828660965, 'model.decoder.layers.6.final_layer_norm.bias': 1.8411765098571777, 'model.decoder.layers.7.self_attn.k_proj.weight': 12.777554035186768, 'model.decoder.layers.7.self_attn.k_proj.bias': 2.133449612529148e-07, 'model.decoder.layers.7.self_attn.v_proj.weight': 3036.2489013671875, 'model.decoder.layers.7.self_attn.v_proj.bias': 3.8926891088485718, 'model.decoder.layers.7.self_attn.q_proj.weight': 44.53946495056152, 'model.decoder.layers.7.self_attn.q_proj.bias': 0.47235769033432007, 'model.decoder.layers.7.self_attn.out_proj.weight': 121.07401657104492, 'model.decoder.layers.7.self_attn.out_proj.bias': 21.619566917419434, 'model.decoder.layers.7.self_attn_layer_norm.weight': 0.8050994277000427, 'model.decoder.layers.7.self_attn_layer_norm.bias': 0.5536379814147949, 'model.decoder.layers.7.fc1.weight': 430.60791015625, 'model.decoder.layers.7.fc1.bias': 2.4085601568222046, 'model.decoder.layers.7.fc2.weight': 72.63498306274414, 'model.decoder.layers.7.fc2.bias': 4.556353807449341, 'model.decoder.layers.7.final_layer_norm.weight': 0.0022830963134765625, 'model.decoder.layers.7.final_layer_norm.bias': 0.08113458752632141, 'model.decoder.layers.8.self_attn.k_proj.weight': 9.053851246833801, 'model.decoder.layers.8.self_attn.k_proj.bias': 3.628976656955274e-07, 'model.decoder.layers.8.self_attn.v_proj.weight': 2891.9862060546875, 'model.decoder.layers.8.self_attn.v_proj.bias': 10.356960773468018, 'model.decoder.layers.8.self_attn.q_proj.weight': 49.40972137451172, 'model.decoder.layers.8.self_attn.q_proj.bias': 0.9306561648845673, 'model.decoder.layers.8.self_attn.out_proj.weight': 82.90560150146484, 'model.decoder.layers.8.self_attn.out_proj.bias': 2.854060649871826, 'model.decoder.layers.8.self_attn_layer_norm.weight': 2.1702619791030884, 'model.decoder.layers.8.self_attn_layer_norm.bias': 4.5817378759384155, 'model.decoder.layers.8.fc1.weight': 296.97837829589844, 'model.decoder.layers.8.fc1.bias': 0.3790193796157837, 'model.decoder.layers.8.fc2.weight': 75.05359315872192, 'model.decoder.layers.8.fc2.bias': 3.0101819038391113, 'model.decoder.layers.8.final_layer_norm.weight': 1.4613453149795532, 'model.decoder.layers.8.final_layer_norm.bias': 0.2570556402206421, 'model.decoder.layers.9.self_attn.k_proj.weight': 1.9306201934814453, 'model.decoder.layers.9.self_attn.k_proj.bias': 2.727692560711148e-07, 'model.decoder.layers.9.self_attn.v_proj.weight': 2485.32373046875, 'model.decoder.layers.9.self_attn.v_proj.bias': 10.623359858989716, 'model.decoder.layers.9.self_attn.q_proj.weight': 21.537672996520996, 'model.decoder.layers.9.self_attn.q_proj.bias': 0.08984518051147461, 'model.decoder.layers.9.self_attn.out_proj.weight': 35.7772159576416, 'model.decoder.layers.9.self_attn.out_proj.bias': 26.13362693786621, 'model.decoder.layers.9.self_attn_layer_norm.weight': 0.6897743046283722, 'model.decoder.layers.9.self_attn_layer_norm.bias': 0.621272087097168, 'model.decoder.layers.9.fc1.weight': 292.734130859375, 'model.decoder.layers.9.fc1.bias': 0.6253710985183716, 'model.decoder.layers.9.fc2.weight': 110.2846326828003, 'model.decoder.layers.9.fc2.bias': 13.781872510910034, 'model.decoder.layers.9.final_layer_norm.weight': 2.9861000776290894, 'model.decoder.layers.9.final_layer_norm.bias': 0.8339740037918091, 'model.decoder.layers.10.self_attn.k_proj.weight': 22.933305263519287, 'model.decoder.layers.10.self_attn.k_proj.bias': 3.0473688639176544e-07, 'model.decoder.layers.10.self_attn.v_proj.weight': 3388.3648681640625, 'model.decoder.layers.10.self_attn.v_proj.bias': 14.894902229309082, 'model.decoder.layers.10.self_attn.q_proj.weight': 34.66013813018799, 'model.decoder.layers.10.self_attn.q_proj.bias': 0.9107618629932404, 'model.decoder.layers.10.self_attn.out_proj.weight': 81.9998550415039, 'model.decoder.layers.10.self_attn.out_proj.bias': 24.095144271850586, 'model.decoder.layers.10.self_attn_layer_norm.weight': 0.8858213424682617, 'model.decoder.layers.10.self_attn_layer_norm.bias': 14.446499824523926, 'model.decoder.layers.10.fc1.weight': 546.4695587158203, 'model.decoder.layers.10.fc1.bias': 0.9877808392047882, 'model.decoder.layers.10.fc2.weight': 145.4341049194336, 'model.decoder.layers.10.fc2.bias': 24.087068557739258, 'model.decoder.layers.10.final_layer_norm.weight': 1.4037067890167236, 'model.decoder.layers.10.final_layer_norm.bias': 0.7357192635536194, 'model.decoder.layers.11.self_attn.k_proj.weight': 71.24462985992432, 'model.decoder.layers.11.self_attn.k_proj.bias': 1.970353764590982e-07, 'model.decoder.layers.11.self_attn.v_proj.weight': 4247.576904296875, 'model.decoder.layers.11.self_attn.v_proj.bias': 6.607962608337402, 'model.decoder.layers.11.self_attn.q_proj.weight': 31.36197853088379, 'model.decoder.layers.11.self_attn.q_proj.bias': 1.0284015536308289, 'model.decoder.layers.11.self_attn.out_proj.weight': 95.21738243103027, 'model.decoder.layers.11.self_attn.out_proj.bias': 7.107028961181641, 'model.decoder.layers.11.self_attn_layer_norm.weight': 2.3355406522750854, 'model.decoder.layers.11.self_attn_layer_norm.bias': 4.544022440910339, 'model.decoder.layers.11.fc1.weight': 641.5780639648438, 'model.decoder.layers.11.fc1.bias': 3.2487504482269287, 'model.decoder.layers.11.fc2.weight': 195.2799835205078, 'model.decoder.layers.11.fc2.bias': 5.705599308013916, 'model.decoder.layers.11.final_layer_norm.weight': 0.7201624810695648, 'model.decoder.layers.11.final_layer_norm.bias': 0.8963989168405533, 'model.decoder.layers.12.self_attn.k_proj.weight': 61.37257957458496, 'model.decoder.layers.12.self_attn.k_proj.bias': 7.525343903580506e-08, 'model.decoder.layers.12.self_attn.v_proj.weight': 2591.563232421875, 'model.decoder.layers.12.self_attn.v_proj.bias': 2.675809860229492, 'model.decoder.layers.12.self_attn.q_proj.weight': 17.509002685546875, 'model.decoder.layers.12.self_attn.q_proj.bias': 1.8172758221626282, 'model.decoder.layers.12.self_attn.out_proj.weight': 188.48434448242188, 'model.decoder.layers.12.self_attn.out_proj.bias': 13.077558279037476, 'model.decoder.layers.12.self_attn_layer_norm.weight': 4.3261120319366455, 'model.decoder.layers.12.self_attn_layer_norm.bias': 7.927307605743408, 'model.decoder.layers.12.fc1.weight': 502.13922119140625, 'model.decoder.layers.12.fc1.bias': 1.6733602583408356, 'model.decoder.layers.12.fc2.weight': 142.26613807678223, 'model.decoder.layers.12.fc2.bias': 23.42030382156372, 'model.decoder.layers.12.final_layer_norm.weight': 0.5001697540283203, 'model.decoder.layers.12.final_layer_norm.bias': 0.7924132943153381, 'model.decoder.layers.13.self_attn.k_proj.weight': 34.4937047958374, 'model.decoder.layers.13.self_attn.k_proj.bias': 2.2195941085101367e-07, 'model.decoder.layers.13.self_attn.v_proj.weight': 2302.90625, 'model.decoder.layers.13.self_attn.v_proj.bias': 8.033044338226318, 'model.decoder.layers.13.self_attn.q_proj.weight': 52.1029257774353, 'model.decoder.layers.13.self_attn.q_proj.bias': 0.3132213354110718, 'model.decoder.layers.13.self_attn.out_proj.weight': 247.14617919921875, 'model.decoder.layers.13.self_attn.out_proj.bias': 19.530856609344482, 'model.decoder.layers.13.self_attn_layer_norm.weight': 1.1585050821304321, 'model.decoder.layers.13.self_attn_layer_norm.bias': 3.622090220451355, 'model.decoder.layers.13.fc1.weight': 614.5724182128906, 'model.decoder.layers.13.fc1.bias': 2.8091284036636353, 'model.decoder.layers.13.fc2.weight': 258.2345504760742, 'model.decoder.layers.13.fc2.bias': 10.21643352508545, 'model.decoder.layers.13.final_layer_norm.weight': 0.641124427318573, 'model.decoder.layers.13.final_layer_norm.bias': 1.3039615154266357, 'model.decoder.layers.14.self_attn.k_proj.weight': 41.02567100524902, 'model.decoder.layers.14.self_attn.k_proj.bias': 2.6874189984482655e-07, 'model.decoder.layers.14.self_attn.v_proj.weight': 2003.8755493164062, 'model.decoder.layers.14.self_attn.v_proj.bias': 2.8769102096557617, 'model.decoder.layers.14.self_attn.q_proj.weight': 64.850830078125, 'model.decoder.layers.14.self_attn.q_proj.bias': 1.4613530337810516, 'model.decoder.layers.14.self_attn.out_proj.weight': 156.12501525878906, 'model.decoder.layers.14.self_attn.out_proj.bias': 0.8622517585754395, 'model.decoder.layers.14.self_attn_layer_norm.weight': 0.7953442335128784, 'model.decoder.layers.14.self_attn_layer_norm.bias': 6.239347457885742, 'model.decoder.layers.14.fc1.weight': 502.0653991699219, 'model.decoder.layers.14.fc1.bias': 0.23139363527297974, 'model.decoder.layers.14.fc2.weight': 265.2113800048828, 'model.decoder.layers.14.fc2.bias': 6.708292603492737, 'model.decoder.layers.14.final_layer_norm.weight': 0.8758863806724548, 'model.decoder.layers.14.final_layer_norm.bias': 1.0310904383659363, 'model.decoder.layers.15.self_attn.k_proj.weight': 27.863337516784668, 'model.decoder.layers.15.self_attn.k_proj.bias': 1.1906081454071682e-08, 'model.decoder.layers.15.self_attn.v_proj.weight': 1616.340576171875, 'model.decoder.layers.15.self_attn.v_proj.bias': 1.187828540802002, 'model.decoder.layers.15.self_attn.q_proj.weight': 108.25986862182617, 'model.decoder.layers.15.self_attn.q_proj.bias': 0.5772554948925972, 'model.decoder.layers.15.self_attn.out_proj.weight': 177.72638702392578, 'model.decoder.layers.15.self_attn.out_proj.bias': 0.13893771171569824, 'model.decoder.layers.15.self_attn_layer_norm.weight': 1.3950319737195969, 'model.decoder.layers.15.self_attn_layer_norm.bias': 1.9548465013504028, 'model.decoder.layers.15.fc1.weight': 415.725341796875, 'model.decoder.layers.15.fc1.bias': 4.063120901584625, 'model.decoder.layers.15.fc2.weight': 225.14552307128906, 'model.decoder.layers.15.fc2.bias': 0.3618326187133789, 'model.decoder.layers.15.final_layer_norm.weight': 1.5298929512500763, 'model.decoder.layers.15.final_layer_norm.bias': 1.0458898544311523, 'model.decoder.layers.16.self_attn.k_proj.weight': 44.568159103393555, 'model.decoder.layers.16.self_attn.k_proj.bias': 1.8499479637057448e-07, 'model.decoder.layers.16.self_attn.v_proj.weight': 962.0332641601562, 'model.decoder.layers.16.self_attn.v_proj.bias': 5.844789743423462, 'model.decoder.layers.16.self_attn.q_proj.weight': 20.53773546218872, 'model.decoder.layers.16.self_attn.q_proj.bias': 1.0480709671974182, 'model.decoder.layers.16.self_attn.out_proj.weight': 118.24270248413086, 'model.decoder.layers.16.self_attn.out_proj.bias': 2.0651575326919556, 'model.decoder.layers.16.self_attn_layer_norm.weight': 1.8738468289375305, 'model.decoder.layers.16.self_attn_layer_norm.bias': 1.0223253965377808, 'model.decoder.layers.16.fc1.weight': 318.6868591308594, 'model.decoder.layers.16.fc1.bias': 0.23129820823669434, 'model.decoder.layers.16.fc2.weight': 201.34992218017578, 'model.decoder.layers.16.fc2.bias': 9.756301403045654, 'model.decoder.layers.16.final_layer_norm.weight': 1.714493453502655, 'model.decoder.layers.16.final_layer_norm.bias': 1.111115351319313, 'model.decoder.layers.17.self_attn.k_proj.weight': 29.04533100128174, 'model.decoder.layers.17.self_attn.k_proj.bias': 7.135911062050582e-08, 'model.decoder.layers.17.self_attn.v_proj.weight': 887.9828796386719, 'model.decoder.layers.17.self_attn.v_proj.bias': 0.27517807483673096, 'model.decoder.layers.17.self_attn.q_proj.weight': 20.267151832580566, 'model.decoder.layers.17.self_attn.q_proj.bias': 0.09268084540963173, 'model.decoder.layers.17.self_attn.out_proj.weight': 70.96683311462402, 'model.decoder.layers.17.self_attn.out_proj.bias': 1.5897789001464844, 'model.decoder.layers.17.self_attn_layer_norm.weight': 0.1763298511505127, 'model.decoder.layers.17.self_attn_layer_norm.bias': 1.23454087972641, 'model.decoder.layers.17.fc1.weight': 210.88795471191406, 'model.decoder.layers.17.fc1.bias': 1.7308299541473389, 'model.decoder.layers.17.fc2.weight': 186.00568389892578, 'model.decoder.layers.17.fc2.bias': 4.817761540412903, 'model.decoder.layers.17.final_layer_norm.weight': 0.15330547094345093, 'model.decoder.layers.17.final_layer_norm.bias': 0.48350536823272705, 'model.decoder.layers.18.self_attn.k_proj.weight': 46.939762115478516, 'model.decoder.layers.18.self_attn.k_proj.bias': 1.59923388309835e-07, 'model.decoder.layers.18.self_attn.v_proj.weight': 630.8662719726562, 'model.decoder.layers.18.self_attn.v_proj.bias': 4.892416477203369, 'model.decoder.layers.18.self_attn.q_proj.weight': 51.74759101867676, 'model.decoder.layers.18.self_attn.q_proj.bias': 0.22355514019727707, 'model.decoder.layers.18.self_attn.out_proj.weight': 81.67732620239258, 'model.decoder.layers.18.self_attn.out_proj.bias': 1.4505280256271362, 'model.decoder.layers.18.self_attn_layer_norm.weight': 0.2590726763010025, 'model.decoder.layers.18.self_attn_layer_norm.bias': 0.3910653591156006, 'model.decoder.layers.18.fc1.weight': 297.2482452392578, 'model.decoder.layers.18.fc1.bias': 0.5074416100978851, 'model.decoder.layers.18.fc2.weight': 119.08955001831055, 'model.decoder.layers.18.fc2.bias': 1.786450207233429, 'model.decoder.layers.18.final_layer_norm.weight': 0.5543007850646973, 'model.decoder.layers.18.final_layer_norm.bias': 0.15881741046905518, 'model.decoder.layers.19.self_attn.k_proj.weight': 20.938846588134766, 'model.decoder.layers.19.self_attn.k_proj.bias': 1.1113691300579376e-07, 'model.decoder.layers.19.self_attn.v_proj.weight': 577.3338317871094, 'model.decoder.layers.19.self_attn.v_proj.bias': 2.2570200860500336, 'model.decoder.layers.19.self_attn.q_proj.weight': 35.30374526977539, 'model.decoder.layers.19.self_attn.q_proj.bias': 0.27843809127807617, 'model.decoder.layers.19.self_attn.out_proj.weight': 57.634511947631836, 'model.decoder.layers.19.self_attn.out_proj.bias': 0.968856155872345, 'model.decoder.layers.19.self_attn_layer_norm.weight': 0.3715187907218933, 'model.decoder.layers.19.self_attn_layer_norm.bias': 4.551464796066284, 'model.decoder.layers.19.fc1.weight': 160.53913497924805, 'model.decoder.layers.19.fc1.bias': 0.5869311690330505, 'model.decoder.layers.19.fc2.weight': 143.04584503173828, 'model.decoder.layers.19.fc2.bias': 2.516521692276001, 'model.decoder.layers.19.final_layer_norm.weight': 0.25180961191654205, 'model.decoder.layers.19.final_layer_norm.bias': 0.15598544478416443, 'model.decoder.layers.20.self_attn.k_proj.weight': 15.44698429107666, 'model.decoder.layers.20.self_attn.k_proj.bias': 8.140511909004999e-08, 'model.decoder.layers.20.self_attn.v_proj.weight': 358.2361145019531, 'model.decoder.layers.20.self_attn.v_proj.bias': 2.2781952247023582, 'model.decoder.layers.20.self_attn.q_proj.weight': 28.796974182128906, 'model.decoder.layers.20.self_attn.q_proj.bias': 0.16163374483585358, 'model.decoder.layers.20.self_attn.out_proj.weight': 36.0947208404541, 'model.decoder.layers.20.self_attn.out_proj.bias': 2.2072792649269104, 'model.decoder.layers.20.self_attn_layer_norm.weight': 0.9482774138450623, 'model.decoder.layers.20.self_attn_layer_norm.bias': 2.535506010055542, 'model.decoder.layers.20.fc1.weight': 189.20856475830078, 'model.decoder.layers.20.fc1.bias': 0.042084306478500366, 'model.decoder.layers.20.fc2.weight': 122.6939582824707, 'model.decoder.layers.20.fc2.bias': 0.5059230476617813, 'model.decoder.layers.20.final_layer_norm.weight': 0.5442333519458771, 'model.decoder.layers.20.final_layer_norm.bias': 0.846729189157486, 'model.decoder.layers.21.self_attn.k_proj.weight': 31.312291145324707, 'model.decoder.layers.21.self_attn.k_proj.bias': 1.591198355299639e-07, 'model.decoder.layers.21.self_attn.v_proj.weight': 365.84857177734375, 'model.decoder.layers.21.self_attn.v_proj.bias': 0.8699764609336853, 'model.decoder.layers.21.self_attn.q_proj.weight': 12.0049889087677, 'model.decoder.layers.21.self_attn.q_proj.bias': 0.5751035362482071, 'model.decoder.layers.21.self_attn.out_proj.weight': 12.860353469848633, 'model.decoder.layers.21.self_attn.out_proj.bias': 0.7693727016448975, 'model.decoder.layers.21.self_attn_layer_norm.weight': 0.5715694278478622, 'model.decoder.layers.21.self_attn_layer_norm.bias': 0.8064265251159668, 'model.decoder.layers.21.fc1.weight': 216.21472930908203, 'model.decoder.layers.21.fc1.bias': 0.585265651345253, 'model.decoder.layers.21.fc2.weight': 125.50208282470703, 'model.decoder.layers.21.fc2.bias': 0.16725388169288635, 'model.decoder.layers.21.final_layer_norm.weight': 0.13870008289813995, 'model.decoder.layers.21.final_layer_norm.bias': 0.3126177042722702, 'model.decoder.layers.22.self_attn.k_proj.weight': 38.888593673706055, 'model.decoder.layers.22.self_attn.k_proj.bias': 3.7293105492608447e-07, 'model.decoder.layers.22.self_attn.v_proj.weight': 292.6825866699219, 'model.decoder.layers.22.self_attn.v_proj.bias': 1.3705345392227173, 'model.decoder.layers.22.self_attn.q_proj.weight': 17.21405792236328, 'model.decoder.layers.22.self_attn.q_proj.bias': 0.3539115712046623, 'model.decoder.layers.22.self_attn.out_proj.weight': 28.420196533203125, 'model.decoder.layers.22.self_attn.out_proj.bias': 0.46205945312976837, 'model.decoder.layers.22.self_attn_layer_norm.weight': 3.4326250553131104, 'model.decoder.layers.22.self_attn_layer_norm.bias': 0.827709436416626, 'model.decoder.layers.22.fc1.weight': 178.62200927734375, 'model.decoder.layers.22.fc1.bias': 0.18552535772323608, 'model.decoder.layers.22.fc2.weight': 123.97221374511719, 'model.decoder.layers.22.fc2.bias': 0.8516284823417664, 'model.decoder.layers.22.final_layer_norm.weight': 0.5998569130897522, 'model.decoder.layers.22.final_layer_norm.bias': 0.008657440543174744, 'model.decoder.layers.23.self_attn.k_proj.weight': 863.5398254394531, 'model.decoder.layers.23.self_attn.k_proj.bias': 0.00016913098806980997, 'model.decoder.layers.23.self_attn.v_proj.weight': 184.3167495727539, 'model.decoder.layers.23.self_attn.v_proj.bias': 0.8771508932113647, 'model.decoder.layers.23.self_attn.q_proj.weight': 2.100632667541504, 'model.decoder.layers.23.self_attn.q_proj.bias': 0.2935800701379776, 'model.decoder.layers.23.self_attn.out_proj.weight': 24.482698440551758, 'model.decoder.layers.23.self_attn.out_proj.bias': 0.06467963755130768, 'model.decoder.layers.23.self_attn_layer_norm.weight': 20.46866226196289, 'model.decoder.layers.23.self_attn_layer_norm.bias': 0.530587375164032, 'model.decoder.layers.23.fc1.weight': 390.59022521972656, 'model.decoder.layers.23.fc1.bias': 0.38220076262950897, 'model.decoder.layers.23.fc2.weight': 104.04845428466797, 'model.decoder.layers.23.fc2.bias': 0.49092406034469604, 'model.decoder.layers.23.final_layer_norm.weight': 2.4837400913238525, 'model.decoder.layers.23.final_layer_norm.bias': 0.9877657294273376}\n",
    "sensitivity = []*24\n",
    "dict = {}\n",
    "clayer = 0\n",
    "total_weight = []\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.decoder.layers\"):\n",
    "       layer = int(name.split('.')[3])\n",
    "       if layer<0 and layer>=24:\n",
    "           continue\n",
    "       if clayer < layer:\n",
    "           clayer = layer\n",
    "           sensitivity.append(dict)\n",
    "           dict = {}\n",
    "       subname = \".\".join(name.split('.')[4:])\n",
    "       if subname.endswith(\".weight\"):\n",
    "           dict[subname[:-7]] = trace\n",
    "           total_weight.append(trace)\n",
    "sensitivity.append(dict)\n",
    "total_weight = sorted(total_weight)\n",
    "print(sorted(total_weight), len(total_weight))\n",
    "def get_sparsity(layer, name):\n",
    "    id = bisect.bisect_left(total_weight, sensitivity[layer][name]) \n",
    "    lower_bound = 0.4\n",
    "    upper_bound = 1 - lower_bound\n",
    "    sen = lower_bound + id * (upper_bound - lower_bound) / (len(total_weight) - 1 )\n",
    "    return 1 - sen \n",
    "#print(sensitivity[0])\n",
    "#print(sensitivity, len(sensitivity))\n",
    "print(get_sparsity(11, \"self_attn.v_proj\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "0e52f769-42d6-46b1-90c2-f7e88711d7df",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[378.00964403152466, 616.639817237854, 1063.4479117393494, 426.1132535934448, 182.27921056747437, 470.5630054473877, 801.6492919921875, 428.8081798553467, 335.6481399536133, 368.4092251062393, 367.00214672088623, 366.6487159729004, 588.1577467918396, 259.76174652576447, 223.59011363983154, 171.11340653896332, 530.1361274719238, 381.5153684616089, 293.11809039115906, 267.87487959861755, 159.5926011800766, 201.7571828365326, 136.93207877874374, 288.2714184373617, 152.00616973638535, 146.31018382310867, 138.51441717147827, 125.70186698436737, 180.24136781692505, 237.80374109745026, 326.86368000507355, 524.8085265159607]\n"
     ]
    }
   ],
   "source": [
    "## LLaMA-7B layer-level\n",
    "hessian_trace = {'model.embed_tokens.weight': 128.11712646484375, 'model.layers.0.self_attn.q_proj.weight': 0.9730644226074219, 'model.layers.0.self_attn.k_proj.weight': 5.75832986831665, 'model.layers.0.self_attn.v_proj.weight': 267.82562255859375, 'model.layers.0.self_attn.o_proj.weight': 6.358226776123047, 'model.layers.0.mlp.gate_proj.weight': 9.388677597045898, 'model.layers.0.mlp.up_proj.weight': 5.720170497894287, 'model.layers.0.mlp.down_proj.weight': 69.15843200683594, 'model.layers.0.input_layernorm.weight': 6.631749629974365, 'model.layers.0.post_attention_layernorm.weight': 6.195370674133301, 'model.layers.1.self_attn.q_proj.weight': 32.61027526855469, 'model.layers.1.self_attn.k_proj.weight': 3.087127685546875, 'model.layers.1.self_attn.v_proj.weight': 230.0555419921875, 'model.layers.1.self_attn.o_proj.weight': 62.52192687988281, 'model.layers.1.mlp.gate_proj.weight': 38.94524383544922, 'model.layers.1.mlp.up_proj.weight': 35.18130874633789, 'model.layers.1.mlp.down_proj.weight': 199.8224639892578, 'model.layers.1.input_layernorm.weight': 11.42271614074707, 'model.layers.1.post_attention_layernorm.weight': 2.9932126998901367, 'model.layers.2.self_attn.q_proj.weight': 13.063102722167969, 'model.layers.2.self_attn.k_proj.weight': 3.46165132522583, 'model.layers.2.self_attn.v_proj.weight': 302.54681396484375, 'model.layers.2.self_attn.o_proj.weight': 172.65652465820312, 'model.layers.2.mlp.gate_proj.weight': 80.73773956298828, 'model.layers.2.mlp.up_proj.weight': 62.57463073730469, 'model.layers.2.mlp.down_proj.weight': 380, 'model.layers.2.input_layernorm.weight': 1.6050081253051758, 'model.layers.2.post_attention_layernorm.weight': 46.80244064331055, 'model.layers.3.self_attn.q_proj.weight': 6.005413055419922, 'model.layers.3.self_attn.k_proj.weight': 12.287504196166992, 'model.layers.3.self_attn.v_proj.weight': 126.36164855957031, 'model.layers.3.self_attn.o_proj.weight': 58.28596878051758, 'model.layers.3.mlp.gate_proj.weight': 47.557952880859375, 'model.layers.3.mlp.up_proj.weight': 115.69792938232422, 'model.layers.3.mlp.down_proj.weight': 31.15878677368164, 'model.layers.3.input_layernorm.weight': 15.918931007385254, 'model.layers.3.post_attention_layernorm.weight': 12.839118957519531, 'model.layers.4.self_attn.q_proj.weight': 4.627760410308838, 'model.layers.4.self_attn.k_proj.weight': 18.468809127807617, 'model.layers.4.self_attn.v_proj.weight': 26.223430633544922, 'model.layers.4.self_attn.o_proj.weight': 30.891420364379883, 'model.layers.4.mlp.gate_proj.weight': 23.2423095703125, 'model.layers.4.mlp.up_proj.weight': 18.635950088500977, 'model.layers.4.mlp.down_proj.weight': 28.47480010986328, 'model.layers.4.input_layernorm.weight': 19.696210861206055, 'model.layers.4.post_attention_layernorm.weight': 12.018519401550293, 'model.layers.5.self_attn.q_proj.weight': 14.223091125488281, 'model.layers.5.self_attn.k_proj.weight': 3.231780529022217, 'model.layers.5.self_attn.v_proj.weight': 76.36811065673828, 'model.layers.5.self_attn.o_proj.weight': 44.5665397644043, 'model.layers.5.mlp.gate_proj.weight': 6.113771915435791, 'model.layers.5.mlp.up_proj.weight': 199.17459106445312, 'model.layers.5.mlp.down_proj.weight': 82.71095275878906, 'model.layers.5.input_layernorm.weight': 35.76387023925781, 'model.layers.5.post_attention_layernorm.weight': 8.410297393798828, 'model.layers.6.self_attn.q_proj.weight': 112.5117416381836, 'model.layers.6.self_attn.k_proj.weight': 20.700550079345703, 'model.layers.6.self_attn.v_proj.weight': 198.06227111816406, 'model.layers.6.self_attn.o_proj.weight': 178.03009033203125, 'model.layers.6.mlp.gate_proj.weight': 101.31014251708984, 'model.layers.6.mlp.up_proj.weight': 88.3492431640625, 'model.layers.6.mlp.down_proj.weight': 64.1391372680664, 'model.layers.6.input_layernorm.weight': 28.630756378173828, 'model.layers.6.post_attention_layernorm.weight': 9.915359497070312, 'model.layers.7.self_attn.q_proj.weight': 33.872005462646484, 'model.layers.7.self_attn.k_proj.weight': 26.917009353637695, 'model.layers.7.self_attn.v_proj.weight': 173.34609985351562, 'model.layers.7.self_attn.o_proj.weight': 15.343713760375977, 'model.layers.7.mlp.gate_proj.weight': 46.84624481201172, 'model.layers.7.mlp.up_proj.weight': 7.409170150756836, 'model.layers.7.mlp.down_proj.weight': 90.63573455810547, 'model.layers.7.input_layernorm.weight': 16.19373321533203, 'model.layers.7.post_attention_layernorm.weight': 18.244468688964844, 'model.layers.8.self_attn.q_proj.weight': 36.83240509033203, 'model.layers.8.self_attn.k_proj.weight': 12.957653999328613, 'model.layers.8.self_attn.v_proj.weight': 25.95500373840332, 'model.layers.8.self_attn.o_proj.weight': 42.89203643798828, 'model.layers.8.mlp.gate_proj.weight': 23.363861083984375, 'model.layers.8.mlp.up_proj.weight': 13.634514808654785, 'model.layers.8.mlp.down_proj.weight': 128.12017822265625, 'model.layers.8.input_layernorm.weight': 23.67529296875, 'model.layers.8.post_attention_layernorm.weight': 28.217193603515625, 'model.layers.9.self_attn.q_proj.weight': 40.68372344970703, 'model.layers.9.self_attn.k_proj.weight': 20.93582534790039, 'model.layers.9.self_attn.v_proj.weight': 20.43726348876953, 'model.layers.9.self_attn.o_proj.weight': 2.8239784240722656, 'model.layers.9.mlp.gate_proj.weight': 35.94215393066406, 'model.layers.9.mlp.up_proj.weight': 64.20850372314453, 'model.layers.9.mlp.down_proj.weight': 157.94346618652344, 'model.layers.9.input_layernorm.weight': 0.5496364831924438, 'model.layers.9.post_attention_layernorm.weight': 24.884674072265625, 'model.layers.10.self_attn.q_proj.weight': 15.310616493225098, 'model.layers.10.self_attn.k_proj.weight': 47.65492630004883, 'model.layers.10.self_attn.v_proj.weight': 62.41743087768555, 'model.layers.10.self_attn.o_proj.weight': 94.44921875, 'model.layers.10.mlp.gate_proj.weight': 0.06731986999511719, 'model.layers.10.mlp.up_proj.weight': 72.94913482666016, 'model.layers.10.mlp.down_proj.weight': 45.771446228027344, 'model.layers.10.input_layernorm.weight': 19.24749755859375, 'model.layers.10.post_attention_layernorm.weight': 9.13455581665039, 'model.layers.11.self_attn.q_proj.weight': 26.566925048828125, 'model.layers.11.self_attn.k_proj.weight': 15.202154159545898, 'model.layers.11.self_attn.v_proj.weight': 64.13490295410156, 'model.layers.11.self_attn.o_proj.weight': 66.1649169921875, 'model.layers.11.mlp.gate_proj.weight': 38.161041259765625, 'model.layers.11.mlp.up_proj.weight': 144.74136352539062, 'model.layers.11.mlp.down_proj.weight': 4.287397384643555, 'model.layers.11.input_layernorm.weight': 5.189544677734375, 'model.layers.11.post_attention_layernorm.weight': 2.200469970703125, 'model.layers.12.self_attn.q_proj.weight': 1.3900446891784668, 'model.layers.12.self_attn.k_proj.weight': 24.41895866394043, 'model.layers.12.self_attn.v_proj.weight': 195.2510528564453, 'model.layers.12.self_attn.o_proj.weight': 118.57347106933594, 'model.layers.12.mlp.gate_proj.weight': 44.17179489135742, 'model.layers.12.mlp.up_proj.weight': 50.411373138427734, 'model.layers.12.mlp.down_proj.weight': 126.22769165039062, 'model.layers.12.input_layernorm.weight': 19.60637664794922, 'model.layers.12.post_attention_layernorm.weight': 8.106983184814453, 'model.layers.13.self_attn.q_proj.weight': 48.121925354003906, 'model.layers.13.self_attn.k_proj.weight': 1.2666702270507812, 'model.layers.13.self_attn.v_proj.weight': 38.51007843017578, 'model.layers.13.self_attn.o_proj.weight': 19.222043991088867, 'model.layers.13.mlp.gate_proj.weight': 22.64798355102539, 'model.layers.13.mlp.up_proj.weight': 33.717071533203125, 'model.layers.13.mlp.down_proj.weight': 94.04187774658203, 'model.layers.13.input_layernorm.weight': 1.0167909860610962, 'model.layers.13.post_attention_layernorm.weight': 1.2173047065734863, 'model.layers.14.self_attn.q_proj.weight': 5.421915054321289, 'model.layers.14.self_attn.k_proj.weight': 27.10348892211914, 'model.layers.14.self_attn.v_proj.weight': 91.35397338867188, 'model.layers.14.self_attn.o_proj.weight': 33.31367874145508, 'model.layers.14.mlp.gate_proj.weight': 28.608022689819336, 'model.layers.14.mlp.up_proj.weight': 31.337360382080078, 'model.layers.14.mlp.down_proj.weight': 5.009794235229492, 'model.layers.14.input_layernorm.weight': 0.9636175632476807, 'model.layers.14.post_attention_layernorm.weight': 0.47826266288757324, 'model.layers.15.self_attn.q_proj.weight': 9.08072566986084, 'model.layers.15.self_attn.k_proj.weight': 20.989229202270508, 'model.layers.15.self_attn.v_proj.weight': 57.11623001098633, 'model.layers.15.self_attn.o_proj.weight': 5.311587333679199, 'model.layers.15.mlp.gate_proj.weight': 19.28093719482422, 'model.layers.15.mlp.up_proj.weight': 2.2559266090393066, 'model.layers.15.mlp.down_proj.weight': 47.119606018066406, 'model.layers.15.input_layernorm.weight': 9.245084762573242, 'model.layers.15.post_attention_layernorm.weight': 0.714079737663269, 'model.layers.16.self_attn.q_proj.weight': 25.530439376831055, 'model.layers.16.self_attn.k_proj.weight': 16.02246856689453, 'model.layers.16.self_attn.v_proj.weight': 124.31573486328125, 'model.layers.16.self_attn.o_proj.weight': 65.66757202148438, 'model.layers.16.mlp.gate_proj.weight': 89.2286376953125, 'model.layers.16.mlp.up_proj.weight': 78.2838363647461, 'model.layers.16.mlp.down_proj.weight': 122.17619323730469, 'model.layers.16.input_layernorm.weight': 3.9033946990966797, 'model.layers.16.post_attention_layernorm.weight': 5.007850646972656, 'model.layers.17.self_attn.q_proj.weight': 30.791297912597656, 'model.layers.17.self_attn.k_proj.weight': 6.001221656799316, 'model.layers.17.self_attn.v_proj.weight': 50.58659362792969, 'model.layers.17.self_attn.o_proj.weight': 65.87053680419922, 'model.layers.17.mlp.gate_proj.weight': 37.348777770996094, 'model.layers.17.mlp.up_proj.weight': 37.3125114440918, 'model.layers.17.mlp.down_proj.weight': 135.64047241210938, 'model.layers.17.input_layernorm.weight': 5.8337860107421875, 'model.layers.17.post_attention_layernorm.weight': 12.130170822143555, 'model.layers.18.self_attn.q_proj.weight': 6.057760715484619, 'model.layers.18.self_attn.k_proj.weight': 8.957040786743164, 'model.layers.18.self_attn.v_proj.weight': 90.65309143066406, 'model.layers.18.self_attn.o_proj.weight': 49.07258987426758, 'model.layers.18.mlp.gate_proj.weight': 53.93105697631836, 'model.layers.18.mlp.up_proj.weight': 46.59817123413086, 'model.layers.18.mlp.down_proj.weight': 32.15882873535156, 'model.layers.18.input_layernorm.weight': 1.3362815380096436, 'model.layers.18.post_attention_layernorm.weight': 4.353269100189209, 'model.layers.19.self_attn.q_proj.weight': 0.4053499698638916, 'model.layers.19.self_attn.k_proj.weight': 25.439279556274414, 'model.layers.19.self_attn.v_proj.weight': 62.09384536743164, 'model.layers.19.self_attn.o_proj.weight': 59.380409240722656, 'model.layers.19.mlp.gate_proj.weight': 66.84228515625, 'model.layers.19.mlp.up_proj.weight': 42.51271438598633, 'model.layers.19.mlp.down_proj.weight': 0.2137746810913086, 'model.layers.19.input_layernorm.weight': 5.700724124908447, 'model.layers.19.post_attention_layernorm.weight': 5.286497116088867, 'model.layers.20.self_attn.q_proj.weight': 5.815807819366455, 'model.layers.20.self_attn.k_proj.weight': 3.2934980392456055, 'model.layers.20.self_attn.v_proj.weight': 30.229206085205078, 'model.layers.20.self_attn.o_proj.weight': 23.19651222229004, 'model.layers.20.mlp.gate_proj.weight': 6.808246612548828, 'model.layers.20.mlp.up_proj.weight': 30.613590240478516, 'model.layers.20.mlp.down_proj.weight': 54.0933723449707, 'model.layers.20.input_layernorm.weight': 1.9205161333084106, 'model.layers.20.post_attention_layernorm.weight': 3.621851682662964, 'model.layers.21.self_attn.q_proj.weight': 1.084111213684082, 'model.layers.21.self_attn.k_proj.weight': 0.011959314346313477, 'model.layers.21.self_attn.v_proj.weight': 19.965911865234375, 'model.layers.21.self_attn.o_proj.weight': 4.297876358032227, 'model.layers.21.mlp.gate_proj.weight': 32.929927825927734, 'model.layers.21.mlp.up_proj.weight': 38.852149963378906, 'model.layers.21.mlp.down_proj.weight': 102.38787841796875, 'model.layers.21.input_layernorm.weight': 1.2508561611175537, 'model.layers.21.post_attention_layernorm.weight': 0.9765117168426514, 'model.layers.22.self_attn.q_proj.weight': 12.39744758605957, 'model.layers.22.self_attn.k_proj.weight': 27.451740264892578, 'model.layers.22.self_attn.v_proj.weight': 11.398211479187012, 'model.layers.22.self_attn.o_proj.weight': 11.232147216796875, 'model.layers.22.mlp.gate_proj.weight': 4.822690963745117, 'model.layers.22.mlp.up_proj.weight': 13.463789939880371, 'model.layers.22.mlp.down_proj.weight': 52.949893951416016, 'model.layers.22.input_layernorm.weight': 0.16745203733444214, 'model.layers.22.post_attention_layernorm.weight': 3.0487053394317627, 'model.layers.23.self_attn.q_proj.weight': 10.201648712158203, 'model.layers.23.self_attn.k_proj.weight': 0.39299440383911133, 'model.layers.23.self_attn.v_proj.weight': 37.769798278808594, 'model.layers.23.self_attn.o_proj.weight': 66.3258056640625, 'model.layers.23.mlp.gate_proj.weight': 84.50884246826172, 'model.layers.23.mlp.up_proj.weight': 7.277076721191406, 'model.layers.23.mlp.down_proj.weight': 77.68579864501953, 'model.layers.23.input_layernorm.weight': 3.9064760208129883, 'model.layers.23.post_attention_layernorm.weight': 0.2029775232076645, 'model.layers.24.self_attn.q_proj.weight': 12.511200904846191, 'model.layers.24.self_attn.k_proj.weight': 5.5696563720703125, 'model.layers.24.self_attn.v_proj.weight': 15.777772903442383, 'model.layers.24.self_attn.o_proj.weight': 18.261016845703125, 'model.layers.24.mlp.gate_proj.weight': 22.595224380493164, 'model.layers.24.mlp.up_proj.weight': 38.35172653198242, 'model.layers.24.mlp.down_proj.weight': 32.59394073486328, 'model.layers.24.input_layernorm.weight': 0.5317487120628357, 'model.layers.24.post_attention_layernorm.weight': 5.813882350921631, 'model.layers.25.self_attn.q_proj.weight': 0.7791382670402527, 'model.layers.25.self_attn.k_proj.weight': 10.64183235168457, 'model.layers.25.self_attn.v_proj.weight': 27.06773567199707, 'model.layers.25.self_attn.o_proj.weight': 2.677760362625122, 'model.layers.25.mlp.gate_proj.weight': 20.254562377929688, 'model.layers.25.mlp.up_proj.weight': 39.62143325805664, 'model.layers.25.mlp.down_proj.weight': 42.814517974853516, 'model.layers.25.input_layernorm.weight': 1.402695655822754, 'model.layers.25.post_attention_layernorm.weight': 1.05050790309906, 'model.layers.26.self_attn.q_proj.weight': 18.389787673950195, 'model.layers.26.self_attn.k_proj.weight': 1.519516944885254, 'model.layers.26.self_attn.v_proj.weight': 53.071800231933594, 'model.layers.26.self_attn.o_proj.weight': 5.7049736976623535, 'model.layers.26.mlp.gate_proj.weight': 24.868253707885742, 'model.layers.26.mlp.up_proj.weight': 20.149593353271484, 'model.layers.26.mlp.down_proj.weight': 9.35295295715332, 'model.layers.26.input_layernorm.weight': 1.8879778385162354, 'model.layers.26.post_attention_layernorm.weight': 3.5695607662200928, 'model.layers.27.self_attn.q_proj.weight': 15.680398941040039, 'model.layers.27.self_attn.k_proj.weight': 14.111237525939941, 'model.layers.27.self_attn.v_proj.weight': 3.434666156768799, 'model.layers.27.self_attn.o_proj.weight': 10.889530181884766, 'model.layers.27.mlp.gate_proj.weight': 19.022090911865234, 'model.layers.27.mlp.up_proj.weight': 13.062423706054688, 'model.layers.27.mlp.down_proj.weight': 38.12520980834961, 'model.layers.27.input_layernorm.weight': 0.24941480159759521, 'model.layers.27.post_attention_layernorm.weight': 11.1268949508667, 'model.layers.28.self_attn.q_proj.weight': 18.237247467041016, 'model.layers.28.self_attn.k_proj.weight': 1.6528589725494385, 'model.layers.28.self_attn.v_proj.weight': 34.114871978759766, 'model.layers.28.self_attn.o_proj.weight': 14.817950248718262, 'model.layers.28.mlp.gate_proj.weight': 2.655719757080078, 'model.layers.28.mlp.up_proj.weight': 83.83953094482422, 'model.layers.28.mlp.down_proj.weight': 18.02721405029297, 'model.layers.28.input_layernorm.weight': 2.8059041500091553, 'model.layers.28.post_attention_layernorm.weight': 4.0900702476501465, 'model.layers.29.self_attn.q_proj.weight': 6.655066967010498, 'model.layers.29.self_attn.k_proj.weight': 10.840666770935059, 'model.layers.29.self_attn.v_proj.weight': 4.528952121734619, 'model.layers.29.self_attn.o_proj.weight': 9.182613372802734, 'model.layers.29.mlp.gate_proj.weight': 59.8269157409668, 'model.layers.29.mlp.up_proj.weight': 59.1226806640625, 'model.layers.29.mlp.down_proj.weight': 80.949462890625, 'model.layers.29.input_layernorm.weight': 0.5516930818557739, 'model.layers.29.post_attention_layernorm.weight': 6.145689487457275, 'model.layers.30.self_attn.q_proj.weight': 17.526729583740234, 'model.layers.30.self_attn.k_proj.weight': 9.665191650390625, 'model.layers.30.self_attn.v_proj.weight': 46.41564178466797, 'model.layers.30.self_attn.o_proj.weight': 17.27133560180664, 'model.layers.30.mlp.gate_proj.weight': 106.48152160644531, 'model.layers.30.mlp.up_proj.weight': 62.614585876464844, 'model.layers.30.mlp.down_proj.weight': 27.564010620117188, 'model.layers.30.input_layernorm.weight': 1.5891705751419067, 'model.layers.30.post_attention_layernorm.weight': 37.73549270629883, 'model.layers.31.self_attn.q_proj.weight': 6.382080078125, 'model.layers.31.self_attn.k_proj.weight': 9.235940933227539, 'model.layers.31.self_attn.v_proj.weight': 15.959426879882812, 'model.layers.31.self_attn.o_proj.weight': 20.189496994018555, 'model.layers.31.mlp.gate_proj.weight': 16.908679962158203, 'model.layers.31.mlp.up_proj.weight': 100.0942153930664, 'model.layers.31.mlp.down_proj.weight': 340, 'model.layers.31.input_layernorm.weight': 14.88443374633789, 'model.layers.31.post_attention_layernorm.weight': 1.154252529144287, 'model.norm.weight': 5.090324401855469, 'lm_head.weight': 5700.6513671875}\n",
    "sensitivity = [0]*32\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.layers\"):\n",
    "       layer = int(name.split('.')[2])\n",
    "       sensitivity[layer] += trace\n",
    "print(sensitivity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "603d57d1-82ac-41b0-9703-e216594bfc19",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.011959314346313477, 0.06731986999511719, 0.16745203733444214, 0.2029775232076645, 0.2137746810913086, 0.24941480159759521, 0.39299440383911133, 0.4053499698638916, 0.47826266288757324, 0.5317487120628357, 0.5496364831924438, 0.5516930818557739, 0.714079737663269, 0.7791382670402527, 0.9636175632476807, 0.9730644226074219, 0.9765117168426514, 1.0167909860610962, 1.05050790309906, 1.084111213684082, 1.154252529144287, 1.2173047065734863, 1.2508561611175537, 1.2666702270507812, 1.3362815380096436, 1.3900446891784668, 1.402695655822754, 1.519516944885254, 1.5891705751419067, 1.6050081253051758, 1.6528589725494385, 1.8879778385162354, 1.9205161333084106, 2.200469970703125, 2.2559266090393066, 2.655719757080078, 2.677760362625122, 2.8059041500091553, 2.8239784240722656, 2.9932126998901367, 3.0487053394317627, 3.087127685546875, 3.231780529022217, 3.2934980392456055, 3.434666156768799, 3.46165132522583, 3.5695607662200928, 3.621851682662964, 3.9033946990966797, 3.9064760208129883, 4.0900702476501465, 4.287397384643555, 4.297876358032227, 4.353269100189209, 4.528952121734619, 4.627760410308838, 4.822690963745117, 5.007850646972656, 5.009794235229492, 5.189544677734375, 5.286497116088867, 5.311587333679199, 5.421915054321289, 5.5696563720703125, 5.700724124908447, 5.7049736976623535, 5.720170497894287, 5.75832986831665, 5.813882350921631, 5.815807819366455, 5.8337860107421875, 6.001221656799316, 6.005413055419922, 6.057760715484619, 6.113771915435791, 6.145689487457275, 6.195370674133301, 6.358226776123047, 6.382080078125, 6.631749629974365, 6.655066967010498, 6.808246612548828, 7.277076721191406, 7.409170150756836, 8.106983184814453, 8.410297393798828, 8.957040786743164, 9.08072566986084, 9.13455581665039, 9.182613372802734, 9.235940933227539, 9.245084762573242, 9.35295295715332, 9.388677597045898, 9.665191650390625, 9.915359497070312, 10.201648712158203, 10.64183235168457, 10.840666770935059, 10.889530181884766, 11.1268949508667, 11.232147216796875, 11.398211479187012, 11.42271614074707, 12.018519401550293, 12.130170822143555, 12.287504196166992, 12.39744758605957, 12.511200904846191, 12.839118957519531, 12.957653999328613, 13.062423706054688, 13.063102722167969, 13.463789939880371, 13.634514808654785, 14.111237525939941, 14.223091125488281, 14.817950248718262, 14.88443374633789, 15.202154159545898, 15.310616493225098, 15.343713760375977, 15.680398941040039, 15.777772903442383, 15.918931007385254, 15.959426879882812, 16.02246856689453, 16.19373321533203, 16.908679962158203, 17.27133560180664, 17.526729583740234, 18.02721405029297, 18.237247467041016, 18.244468688964844, 18.261016845703125, 18.389787673950195, 18.468809127807617, 18.635950088500977, 19.022090911865234, 19.222043991088867, 19.24749755859375, 19.28093719482422, 19.60637664794922, 19.696210861206055, 19.965911865234375, 20.149593353271484, 20.189496994018555, 20.254562377929688, 20.43726348876953, 20.700550079345703, 20.93582534790039, 20.989229202270508, 22.595224380493164, 22.64798355102539, 23.19651222229004, 23.2423095703125, 23.363861083984375, 23.67529296875, 24.41895866394043, 24.868253707885742, 24.884674072265625, 25.439279556274414, 25.530439376831055, 25.95500373840332, 26.223430633544922, 26.566925048828125, 26.917009353637695, 27.06773567199707, 27.10348892211914, 27.451740264892578, 27.564010620117188, 28.217193603515625, 28.47480010986328, 28.608022689819336, 28.630756378173828, 30.229206085205078, 30.613590240478516, 30.791297912597656, 30.891420364379883, 31.15878677368164, 31.337360382080078, 32.15882873535156, 32.59394073486328, 32.61027526855469, 32.929927825927734, 33.31367874145508, 33.717071533203125, 33.872005462646484, 34.114871978759766, 35.18130874633789, 35.76387023925781, 35.94215393066406, 36.83240509033203, 37.3125114440918, 37.348777770996094, 37.73549270629883, 37.769798278808594, 38.12520980834961, 38.161041259765625, 38.35172653198242, 38.51007843017578, 38.852149963378906, 38.94524383544922, 39.62143325805664, 40.68372344970703, 42.51271438598633, 42.814517974853516, 42.89203643798828, 44.17179489135742, 44.5665397644043, 45.771446228027344, 46.41564178466797, 46.59817123413086, 46.80244064331055, 46.84624481201172, 47.119606018066406, 47.557952880859375, 47.65492630004883, 48.121925354003906, 49.07258987426758, 50.411373138427734, 50.58659362792969, 52.949893951416016, 53.071800231933594, 53.93105697631836, 54.0933723449707, 57.11623001098633, 58.28596878051758, 59.1226806640625, 59.380409240722656, 59.8269157409668, 62.09384536743164, 62.41743087768555, 62.52192687988281, 62.57463073730469, 62.614585876464844, 64.13490295410156, 64.1391372680664, 64.20850372314453, 65.66757202148438, 65.87053680419922, 66.1649169921875, 66.3258056640625, 66.84228515625, 69.15843200683594, 72.94913482666016, 76.36811065673828, 77.68579864501953, 78.2838363647461, 80.73773956298828, 80.949462890625, 82.71095275878906, 83.83953094482422, 84.50884246826172, 88.3492431640625, 89.2286376953125, 90.63573455810547, 90.65309143066406, 91.35397338867188, 94.04187774658203, 94.44921875, 100.0942153930664, 101.31014251708984, 102.38787841796875, 106.48152160644531, 112.5117416381836, 115.69792938232422, 118.57347106933594, 122.17619323730469, 124.31573486328125, 126.22769165039062, 126.36164855957031, 128.12017822265625, 135.64047241210938, 144.74136352539062, 157.94346618652344, 172.65652465820312, 173.34609985351562, 178.03009033203125, 195.2510528564453, 198.06227111816406, 199.17459106445312, 199.8224639892578, 230.0555419921875, 267.82562255859375, 302.54681396484375, 340, 380] 288\n"
     ]
    }
   ],
   "source": [
    "### LLaMA-7B weight-level\n",
    "hessian_trace = {'model.embed_tokens.weight': 128.11712646484375, 'model.layers.0.self_attn.q_proj.weight': 0.9730644226074219, 'model.layers.0.self_attn.k_proj.weight': 5.75832986831665, 'model.layers.0.self_attn.v_proj.weight': 267.82562255859375, 'model.layers.0.self_attn.o_proj.weight': 6.358226776123047, 'model.layers.0.mlp.gate_proj.weight': 9.388677597045898, 'model.layers.0.mlp.up_proj.weight': 5.720170497894287, 'model.layers.0.mlp.down_proj.weight': 69.15843200683594, 'model.layers.0.input_layernorm.weight': 6.631749629974365, 'model.layers.0.post_attention_layernorm.weight': 6.195370674133301, 'model.layers.1.self_attn.q_proj.weight': 32.61027526855469, 'model.layers.1.self_attn.k_proj.weight': 3.087127685546875, 'model.layers.1.self_attn.v_proj.weight': 230.0555419921875, 'model.layers.1.self_attn.o_proj.weight': 62.52192687988281, 'model.layers.1.mlp.gate_proj.weight': 38.94524383544922, 'model.layers.1.mlp.up_proj.weight': 35.18130874633789, 'model.layers.1.mlp.down_proj.weight': 199.8224639892578, 'model.layers.1.input_layernorm.weight': 11.42271614074707, 'model.layers.1.post_attention_layernorm.weight': 2.9932126998901367, 'model.layers.2.self_attn.q_proj.weight': 13.063102722167969, 'model.layers.2.self_attn.k_proj.weight': 3.46165132522583, 'model.layers.2.self_attn.v_proj.weight': 302.54681396484375, 'model.layers.2.self_attn.o_proj.weight': 172.65652465820312, 'model.layers.2.mlp.gate_proj.weight': 80.73773956298828, 'model.layers.2.mlp.up_proj.weight': 62.57463073730469, 'model.layers.2.mlp.down_proj.weight': 380, 'model.layers.2.input_layernorm.weight': 1.6050081253051758, 'model.layers.2.post_attention_layernorm.weight': 46.80244064331055, 'model.layers.3.self_attn.q_proj.weight': 6.005413055419922, 'model.layers.3.self_attn.k_proj.weight': 12.287504196166992, 'model.layers.3.self_attn.v_proj.weight': 126.36164855957031, 'model.layers.3.self_attn.o_proj.weight': 58.28596878051758, 'model.layers.3.mlp.gate_proj.weight': 47.557952880859375, 'model.layers.3.mlp.up_proj.weight': 115.69792938232422, 'model.layers.3.mlp.down_proj.weight': 31.15878677368164, 'model.layers.3.input_layernorm.weight': 15.918931007385254, 'model.layers.3.post_attention_layernorm.weight': 12.839118957519531, 'model.layers.4.self_attn.q_proj.weight': 4.627760410308838, 'model.layers.4.self_attn.k_proj.weight': 18.468809127807617, 'model.layers.4.self_attn.v_proj.weight': 26.223430633544922, 'model.layers.4.self_attn.o_proj.weight': 30.891420364379883, 'model.layers.4.mlp.gate_proj.weight': 23.2423095703125, 'model.layers.4.mlp.up_proj.weight': 18.635950088500977, 'model.layers.4.mlp.down_proj.weight': 28.47480010986328, 'model.layers.4.input_layernorm.weight': 19.696210861206055, 'model.layers.4.post_attention_layernorm.weight': 12.018519401550293, 'model.layers.5.self_attn.q_proj.weight': 14.223091125488281, 'model.layers.5.self_attn.k_proj.weight': 3.231780529022217, 'model.layers.5.self_attn.v_proj.weight': 76.36811065673828, 'model.layers.5.self_attn.o_proj.weight': 44.5665397644043, 'model.layers.5.mlp.gate_proj.weight': 6.113771915435791, 'model.layers.5.mlp.up_proj.weight': 199.17459106445312, 'model.layers.5.mlp.down_proj.weight': 82.71095275878906, 'model.layers.5.input_layernorm.weight': 35.76387023925781, 'model.layers.5.post_attention_layernorm.weight': 8.410297393798828, 'model.layers.6.self_attn.q_proj.weight': 112.5117416381836, 'model.layers.6.self_attn.k_proj.weight': 20.700550079345703, 'model.layers.6.self_attn.v_proj.weight': 198.06227111816406, 'model.layers.6.self_attn.o_proj.weight': 178.03009033203125, 'model.layers.6.mlp.gate_proj.weight': 101.31014251708984, 'model.layers.6.mlp.up_proj.weight': 88.3492431640625, 'model.layers.6.mlp.down_proj.weight': 64.1391372680664, 'model.layers.6.input_layernorm.weight': 28.630756378173828, 'model.layers.6.post_attention_layernorm.weight': 9.915359497070312, 'model.layers.7.self_attn.q_proj.weight': 33.872005462646484, 'model.layers.7.self_attn.k_proj.weight': 26.917009353637695, 'model.layers.7.self_attn.v_proj.weight': 173.34609985351562, 'model.layers.7.self_attn.o_proj.weight': 15.343713760375977, 'model.layers.7.mlp.gate_proj.weight': 46.84624481201172, 'model.layers.7.mlp.up_proj.weight': 7.409170150756836, 'model.layers.7.mlp.down_proj.weight': 90.63573455810547, 'model.layers.7.input_layernorm.weight': 16.19373321533203, 'model.layers.7.post_attention_layernorm.weight': 18.244468688964844, 'model.layers.8.self_attn.q_proj.weight': 36.83240509033203, 'model.layers.8.self_attn.k_proj.weight': 12.957653999328613, 'model.layers.8.self_attn.v_proj.weight': 25.95500373840332, 'model.layers.8.self_attn.o_proj.weight': 42.89203643798828, 'model.layers.8.mlp.gate_proj.weight': 23.363861083984375, 'model.layers.8.mlp.up_proj.weight': 13.634514808654785, 'model.layers.8.mlp.down_proj.weight': 128.12017822265625, 'model.layers.8.input_layernorm.weight': 23.67529296875, 'model.layers.8.post_attention_layernorm.weight': 28.217193603515625, 'model.layers.9.self_attn.q_proj.weight': 40.68372344970703, 'model.layers.9.self_attn.k_proj.weight': 20.93582534790039, 'model.layers.9.self_attn.v_proj.weight': 20.43726348876953, 'model.layers.9.self_attn.o_proj.weight': 2.8239784240722656, 'model.layers.9.mlp.gate_proj.weight': 35.94215393066406, 'model.layers.9.mlp.up_proj.weight': 64.20850372314453, 'model.layers.9.mlp.down_proj.weight': 157.94346618652344, 'model.layers.9.input_layernorm.weight': 0.5496364831924438, 'model.layers.9.post_attention_layernorm.weight': 24.884674072265625, 'model.layers.10.self_attn.q_proj.weight': 15.310616493225098, 'model.layers.10.self_attn.k_proj.weight': 47.65492630004883, 'model.layers.10.self_attn.v_proj.weight': 62.41743087768555, 'model.layers.10.self_attn.o_proj.weight': 94.44921875, 'model.layers.10.mlp.gate_proj.weight': 0.06731986999511719, 'model.layers.10.mlp.up_proj.weight': 72.94913482666016, 'model.layers.10.mlp.down_proj.weight': 45.771446228027344, 'model.layers.10.input_layernorm.weight': 19.24749755859375, 'model.layers.10.post_attention_layernorm.weight': 9.13455581665039, 'model.layers.11.self_attn.q_proj.weight': 26.566925048828125, 'model.layers.11.self_attn.k_proj.weight': 15.202154159545898, 'model.layers.11.self_attn.v_proj.weight': 64.13490295410156, 'model.layers.11.self_attn.o_proj.weight': 66.1649169921875, 'model.layers.11.mlp.gate_proj.weight': 38.161041259765625, 'model.layers.11.mlp.up_proj.weight': 144.74136352539062, 'model.layers.11.mlp.down_proj.weight': 4.287397384643555, 'model.layers.11.input_layernorm.weight': 5.189544677734375, 'model.layers.11.post_attention_layernorm.weight': 2.200469970703125, 'model.layers.12.self_attn.q_proj.weight': 1.3900446891784668, 'model.layers.12.self_attn.k_proj.weight': 24.41895866394043, 'model.layers.12.self_attn.v_proj.weight': 195.2510528564453, 'model.layers.12.self_attn.o_proj.weight': 118.57347106933594, 'model.layers.12.mlp.gate_proj.weight': 44.17179489135742, 'model.layers.12.mlp.up_proj.weight': 50.411373138427734, 'model.layers.12.mlp.down_proj.weight': 126.22769165039062, 'model.layers.12.input_layernorm.weight': 19.60637664794922, 'model.layers.12.post_attention_layernorm.weight': 8.106983184814453, 'model.layers.13.self_attn.q_proj.weight': 48.121925354003906, 'model.layers.13.self_attn.k_proj.weight': 1.2666702270507812, 'model.layers.13.self_attn.v_proj.weight': 38.51007843017578, 'model.layers.13.self_attn.o_proj.weight': 19.222043991088867, 'model.layers.13.mlp.gate_proj.weight': 22.64798355102539, 'model.layers.13.mlp.up_proj.weight': 33.717071533203125, 'model.layers.13.mlp.down_proj.weight': 94.04187774658203, 'model.layers.13.input_layernorm.weight': 1.0167909860610962, 'model.layers.13.post_attention_layernorm.weight': 1.2173047065734863, 'model.layers.14.self_attn.q_proj.weight': 5.421915054321289, 'model.layers.14.self_attn.k_proj.weight': 27.10348892211914, 'model.layers.14.self_attn.v_proj.weight': 91.35397338867188, 'model.layers.14.self_attn.o_proj.weight': 33.31367874145508, 'model.layers.14.mlp.gate_proj.weight': 28.608022689819336, 'model.layers.14.mlp.up_proj.weight': 31.337360382080078, 'model.layers.14.mlp.down_proj.weight': 5.009794235229492, 'model.layers.14.input_layernorm.weight': 0.9636175632476807, 'model.layers.14.post_attention_layernorm.weight': 0.47826266288757324, 'model.layers.15.self_attn.q_proj.weight': 9.08072566986084, 'model.layers.15.self_attn.k_proj.weight': 20.989229202270508, 'model.layers.15.self_attn.v_proj.weight': 57.11623001098633, 'model.layers.15.self_attn.o_proj.weight': 5.311587333679199, 'model.layers.15.mlp.gate_proj.weight': 19.28093719482422, 'model.layers.15.mlp.up_proj.weight': 2.2559266090393066, 'model.layers.15.mlp.down_proj.weight': 47.119606018066406, 'model.layers.15.input_layernorm.weight': 9.245084762573242, 'model.layers.15.post_attention_layernorm.weight': 0.714079737663269, 'model.layers.16.self_attn.q_proj.weight': 25.530439376831055, 'model.layers.16.self_attn.k_proj.weight': 16.02246856689453, 'model.layers.16.self_attn.v_proj.weight': 124.31573486328125, 'model.layers.16.self_attn.o_proj.weight': 65.66757202148438, 'model.layers.16.mlp.gate_proj.weight': 89.2286376953125, 'model.layers.16.mlp.up_proj.weight': 78.2838363647461, 'model.layers.16.mlp.down_proj.weight': 122.17619323730469, 'model.layers.16.input_layernorm.weight': 3.9033946990966797, 'model.layers.16.post_attention_layernorm.weight': 5.007850646972656, 'model.layers.17.self_attn.q_proj.weight': 30.791297912597656, 'model.layers.17.self_attn.k_proj.weight': 6.001221656799316, 'model.layers.17.self_attn.v_proj.weight': 50.58659362792969, 'model.layers.17.self_attn.o_proj.weight': 65.87053680419922, 'model.layers.17.mlp.gate_proj.weight': 37.348777770996094, 'model.layers.17.mlp.up_proj.weight': 37.3125114440918, 'model.layers.17.mlp.down_proj.weight': 135.64047241210938, 'model.layers.17.input_layernorm.weight': 5.8337860107421875, 'model.layers.17.post_attention_layernorm.weight': 12.130170822143555, 'model.layers.18.self_attn.q_proj.weight': 6.057760715484619, 'model.layers.18.self_attn.k_proj.weight': 8.957040786743164, 'model.layers.18.self_attn.v_proj.weight': 90.65309143066406, 'model.layers.18.self_attn.o_proj.weight': 49.07258987426758, 'model.layers.18.mlp.gate_proj.weight': 53.93105697631836, 'model.layers.18.mlp.up_proj.weight': 46.59817123413086, 'model.layers.18.mlp.down_proj.weight': 32.15882873535156, 'model.layers.18.input_layernorm.weight': 1.3362815380096436, 'model.layers.18.post_attention_layernorm.weight': 4.353269100189209, 'model.layers.19.self_attn.q_proj.weight': 0.4053499698638916, 'model.layers.19.self_attn.k_proj.weight': 25.439279556274414, 'model.layers.19.self_attn.v_proj.weight': 62.09384536743164, 'model.layers.19.self_attn.o_proj.weight': 59.380409240722656, 'model.layers.19.mlp.gate_proj.weight': 66.84228515625, 'model.layers.19.mlp.up_proj.weight': 42.51271438598633, 'model.layers.19.mlp.down_proj.weight': 0.2137746810913086, 'model.layers.19.input_layernorm.weight': 5.700724124908447, 'model.layers.19.post_attention_layernorm.weight': 5.286497116088867, 'model.layers.20.self_attn.q_proj.weight': 5.815807819366455, 'model.layers.20.self_attn.k_proj.weight': 3.2934980392456055, 'model.layers.20.self_attn.v_proj.weight': 30.229206085205078, 'model.layers.20.self_attn.o_proj.weight': 23.19651222229004, 'model.layers.20.mlp.gate_proj.weight': 6.808246612548828, 'model.layers.20.mlp.up_proj.weight': 30.613590240478516, 'model.layers.20.mlp.down_proj.weight': 54.0933723449707, 'model.layers.20.input_layernorm.weight': 1.9205161333084106, 'model.layers.20.post_attention_layernorm.weight': 3.621851682662964, 'model.layers.21.self_attn.q_proj.weight': 1.084111213684082, 'model.layers.21.self_attn.k_proj.weight': 0.011959314346313477, 'model.layers.21.self_attn.v_proj.weight': 19.965911865234375, 'model.layers.21.self_attn.o_proj.weight': 4.297876358032227, 'model.layers.21.mlp.gate_proj.weight': 32.929927825927734, 'model.layers.21.mlp.up_proj.weight': 38.852149963378906, 'model.layers.21.mlp.down_proj.weight': 102.38787841796875, 'model.layers.21.input_layernorm.weight': 1.2508561611175537, 'model.layers.21.post_attention_layernorm.weight': 0.9765117168426514, 'model.layers.22.self_attn.q_proj.weight': 12.39744758605957, 'model.layers.22.self_attn.k_proj.weight': 27.451740264892578, 'model.layers.22.self_attn.v_proj.weight': 11.398211479187012, 'model.layers.22.self_attn.o_proj.weight': 11.232147216796875, 'model.layers.22.mlp.gate_proj.weight': 4.822690963745117, 'model.layers.22.mlp.up_proj.weight': 13.463789939880371, 'model.layers.22.mlp.down_proj.weight': 52.949893951416016, 'model.layers.22.input_layernorm.weight': 0.16745203733444214, 'model.layers.22.post_attention_layernorm.weight': 3.0487053394317627, 'model.layers.23.self_attn.q_proj.weight': 10.201648712158203, 'model.layers.23.self_attn.k_proj.weight': 0.39299440383911133, 'model.layers.23.self_attn.v_proj.weight': 37.769798278808594, 'model.layers.23.self_attn.o_proj.weight': 66.3258056640625, 'model.layers.23.mlp.gate_proj.weight': 84.50884246826172, 'model.layers.23.mlp.up_proj.weight': 7.277076721191406, 'model.layers.23.mlp.down_proj.weight': 77.68579864501953, 'model.layers.23.input_layernorm.weight': 3.9064760208129883, 'model.layers.23.post_attention_layernorm.weight': 0.2029775232076645, 'model.layers.24.self_attn.q_proj.weight': 12.511200904846191, 'model.layers.24.self_attn.k_proj.weight': 5.5696563720703125, 'model.layers.24.self_attn.v_proj.weight': 15.777772903442383, 'model.layers.24.self_attn.o_proj.weight': 18.261016845703125, 'model.layers.24.mlp.gate_proj.weight': 22.595224380493164, 'model.layers.24.mlp.up_proj.weight': 38.35172653198242, 'model.layers.24.mlp.down_proj.weight': 32.59394073486328, 'model.layers.24.input_layernorm.weight': 0.5317487120628357, 'model.layers.24.post_attention_layernorm.weight': 5.813882350921631, 'model.layers.25.self_attn.q_proj.weight': 0.7791382670402527, 'model.layers.25.self_attn.k_proj.weight': 10.64183235168457, 'model.layers.25.self_attn.v_proj.weight': 27.06773567199707, 'model.layers.25.self_attn.o_proj.weight': 2.677760362625122, 'model.layers.25.mlp.gate_proj.weight': 20.254562377929688, 'model.layers.25.mlp.up_proj.weight': 39.62143325805664, 'model.layers.25.mlp.down_proj.weight': 42.814517974853516, 'model.layers.25.input_layernorm.weight': 1.402695655822754, 'model.layers.25.post_attention_layernorm.weight': 1.05050790309906, 'model.layers.26.self_attn.q_proj.weight': 18.389787673950195, 'model.layers.26.self_attn.k_proj.weight': 1.519516944885254, 'model.layers.26.self_attn.v_proj.weight': 53.071800231933594, 'model.layers.26.self_attn.o_proj.weight': 5.7049736976623535, 'model.layers.26.mlp.gate_proj.weight': 24.868253707885742, 'model.layers.26.mlp.up_proj.weight': 20.149593353271484, 'model.layers.26.mlp.down_proj.weight': 9.35295295715332, 'model.layers.26.input_layernorm.weight': 1.8879778385162354, 'model.layers.26.post_attention_layernorm.weight': 3.5695607662200928, 'model.layers.27.self_attn.q_proj.weight': 15.680398941040039, 'model.layers.27.self_attn.k_proj.weight': 14.111237525939941, 'model.layers.27.self_attn.v_proj.weight': 3.434666156768799, 'model.layers.27.self_attn.o_proj.weight': 10.889530181884766, 'model.layers.27.mlp.gate_proj.weight': 19.022090911865234, 'model.layers.27.mlp.up_proj.weight': 13.062423706054688, 'model.layers.27.mlp.down_proj.weight': 38.12520980834961, 'model.layers.27.input_layernorm.weight': 0.24941480159759521, 'model.layers.27.post_attention_layernorm.weight': 11.1268949508667, 'model.layers.28.self_attn.q_proj.weight': 18.237247467041016, 'model.layers.28.self_attn.k_proj.weight': 1.6528589725494385, 'model.layers.28.self_attn.v_proj.weight': 34.114871978759766, 'model.layers.28.self_attn.o_proj.weight': 14.817950248718262, 'model.layers.28.mlp.gate_proj.weight': 2.655719757080078, 'model.layers.28.mlp.up_proj.weight': 83.83953094482422, 'model.layers.28.mlp.down_proj.weight': 18.02721405029297, 'model.layers.28.input_layernorm.weight': 2.8059041500091553, 'model.layers.28.post_attention_layernorm.weight': 4.0900702476501465, 'model.layers.29.self_attn.q_proj.weight': 6.655066967010498, 'model.layers.29.self_attn.k_proj.weight': 10.840666770935059, 'model.layers.29.self_attn.v_proj.weight': 4.528952121734619, 'model.layers.29.self_attn.o_proj.weight': 9.182613372802734, 'model.layers.29.mlp.gate_proj.weight': 59.8269157409668, 'model.layers.29.mlp.up_proj.weight': 59.1226806640625, 'model.layers.29.mlp.down_proj.weight': 80.949462890625, 'model.layers.29.input_layernorm.weight': 0.5516930818557739, 'model.layers.29.post_attention_layernorm.weight': 6.145689487457275, 'model.layers.30.self_attn.q_proj.weight': 17.526729583740234, 'model.layers.30.self_attn.k_proj.weight': 9.665191650390625, 'model.layers.30.self_attn.v_proj.weight': 46.41564178466797, 'model.layers.30.self_attn.o_proj.weight': 17.27133560180664, 'model.layers.30.mlp.gate_proj.weight': 106.48152160644531, 'model.layers.30.mlp.up_proj.weight': 62.614585876464844, 'model.layers.30.mlp.down_proj.weight': 27.564010620117188, 'model.layers.30.input_layernorm.weight': 1.5891705751419067, 'model.layers.30.post_attention_layernorm.weight': 37.73549270629883, 'model.layers.31.self_attn.q_proj.weight': 6.382080078125, 'model.layers.31.self_attn.k_proj.weight': 9.235940933227539, 'model.layers.31.self_attn.v_proj.weight': 15.959426879882812, 'model.layers.31.self_attn.o_proj.weight': 20.189496994018555, 'model.layers.31.mlp.gate_proj.weight': 16.908679962158203, 'model.layers.31.mlp.up_proj.weight': 100.0942153930664, 'model.layers.31.mlp.down_proj.weight': 340, 'model.layers.31.input_layernorm.weight': 14.88443374633789, 'model.layers.31.post_attention_layernorm.weight': 1.154252529144287, 'model.norm.weight': 5.090324401855469, 'lm_head.weight': 5700.6513671875}\n",
    "sensitivity = []*32\n",
    "dict = {}\n",
    "clayer = 0\n",
    "total_weight = []\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.layers\"):\n",
    "       layer = int(name.split('.')[2])\n",
    "       if layer<0 and layer>=32:\n",
    "           continue\n",
    "       if clayer < layer:\n",
    "           clayer = layer\n",
    "           sensitivity.append(dict)\n",
    "           dict = {}\n",
    "       subname = \".\".join(name.split('.')[3:])\n",
    "       if subname.endswith(\".weight\"):\n",
    "           dict[subname[:-7]] = trace\n",
    "           total_weight.append(trace)\n",
    "sensitivity.append(dict)\n",
    "#print(sensitivity[0])\n",
    "#print(sensitivity, len(sensitivity))\n",
    "total_weight = sorted(total_weight)\n",
    "print(sorted(total_weight), len(total_weight))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "da846e53-7222-42ae-89bb-09b27148fd47",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[291.7909917831421, 366.22037506103516, 314.3855085372925, 559.4782600402832, 464.1611452102661, 601.2009010314941, 681.5765066146851, 470.96419763565063, 775.2183933258057, 875.9223899841309, 799.133394241333, 760.97434425354, 695.8224081993103, 309.4704909324646, 350.3650779724121, 461.700626373291, 346.8023681640625, 704.4676382541656, 557.0734012126923, 627.6702337265015, 422.3033332824707, 383.53755474090576, 454.09480595588684, 573.0809116363525, 520.4448375701904, 256.17217767238617, 277.9087266921997, 463.391752243042, 604.0778455734253, 761.2426834106445, 1107.1395645141602, 1528.82173037529]\n"
     ]
    }
   ],
   "source": [
    "### Baichuan-7B layer-level \n",
    "hessian_trace = {'model.embed_tokens.weight': 88.54046630859375, 'model.layers.0.self_attn.W_pack.weight': 149.6170196533203, 'model.layers.0.self_attn.o_proj.weight': 25.089771270751953, 'model.layers.0.mlp.gate_proj.weight': 2.080451011657715, 'model.layers.0.mlp.down_proj.weight': 47.47933578491211, 'model.layers.0.mlp.up_proj.weight': 9.186905860900879, 'model.layers.0.input_layernorm.weight': 4.974452018737793, 'model.layers.0.post_attention_layernorm.weight': 53.36305618286133, 'model.layers.1.self_attn.W_pack.weight': 117.78713989257812, 'model.layers.1.self_attn.o_proj.weight': 102.39787292480469, 'model.layers.1.mlp.gate_proj.weight': 55.00774383544922, 'model.layers.1.mlp.down_proj.weight': 24.53706169128418, 'model.layers.1.mlp.up_proj.weight': 49.37354278564453, 'model.layers.1.input_layernorm.weight': 0.14952468872070312, 'model.layers.1.post_attention_layernorm.weight': 16.96748924255371, 'model.layers.2.self_attn.W_pack.weight': 206.29974365234375, 'model.layers.2.self_attn.o_proj.weight': 30.69840431213379, 'model.layers.2.mlp.gate_proj.weight': 15.665666580200195, 'model.layers.2.mlp.down_proj.weight': 30.206314086914062, 'model.layers.2.mlp.up_proj.weight': 1.7222919464111328, 'model.layers.2.input_layernorm.weight': 12.663371086120605, 'model.layers.2.post_attention_layernorm.weight': 17.129716873168945, 'model.layers.3.self_attn.W_pack.weight': 352.04913330078125, 'model.layers.3.self_attn.o_proj.weight': 33.542808532714844, 'model.layers.3.mlp.gate_proj.weight': 18.709897994995117, 'model.layers.3.mlp.down_proj.weight': 115.19551086425781, 'model.layers.3.mlp.up_proj.weight': 26.916963577270508, 'model.layers.3.input_layernorm.weight': 4.112678527832031, 'model.layers.3.post_attention_layernorm.weight': 8.95126724243164, 'model.layers.4.self_attn.W_pack.weight': 310.98419189453125, 'model.layers.4.self_attn.o_proj.weight': 49.46668243408203, 'model.layers.4.mlp.gate_proj.weight': 16.731216430664062, 'model.layers.4.mlp.down_proj.weight': 20.60802459716797, 'model.layers.4.mlp.up_proj.weight': 24.02362823486328, 'model.layers.4.input_layernorm.weight': 33.72087097167969, 'model.layers.4.post_attention_layernorm.weight': 8.626530647277832, 'model.layers.5.self_attn.W_pack.weight': 373.24517822265625, 'model.layers.5.self_attn.o_proj.weight': 10.94842529296875, 'model.layers.5.mlp.gate_proj.weight': 64.949462890625, 'model.layers.5.mlp.down_proj.weight': 79.34925842285156, 'model.layers.5.mlp.up_proj.weight': 56.05812454223633, 'model.layers.5.input_layernorm.weight': 9.365440368652344, 'model.layers.5.post_attention_layernorm.weight': 7.285011291503906, 'model.layers.6.self_attn.W_pack.weight': 235.14219665527344, 'model.layers.6.self_attn.o_proj.weight': 178.3316192626953, 'model.layers.6.mlp.gate_proj.weight': 59.52509307861328, 'model.layers.6.mlp.down_proj.weight': 151.09246826171875, 'model.layers.6.mlp.up_proj.weight': 36.25011444091797, 'model.layers.6.input_layernorm.weight': 8.410454750061035, 'model.layers.6.post_attention_layernorm.weight': 12.824560165405273, 'model.layers.7.self_attn.W_pack.weight': 43.58650207519531, 'model.layers.7.self_attn.o_proj.weight': 49.27372360229492, 'model.layers.7.mlp.gate_proj.weight': 57.944602966308594, 'model.layers.7.mlp.down_proj.weight': 298.6398620605469, 'model.layers.7.mlp.up_proj.weight': 0.5254945755004883, 'model.layers.7.input_layernorm.weight': 2.499436855316162, 'model.layers.7.post_attention_layernorm.weight': 18.49457550048828, 'model.layers.8.self_attn.W_pack.weight': 48.906715393066406, 'model.layers.8.self_attn.o_proj.weight': 179.1116485595703, 'model.layers.8.mlp.gate_proj.weight': 75.8370132446289, 'model.layers.8.mlp.down_proj.weight': 230.78240966796875, 'model.layers.8.mlp.up_proj.weight': 185.375, 'model.layers.8.input_layernorm.weight': 44.37461853027344, 'model.layers.8.post_attention_layernorm.weight': 10.830987930297852, 'model.layers.9.self_attn.W_pack.weight': 82.59011840820312, 'model.layers.9.self_attn.o_proj.weight': 269.88671875, 'model.layers.9.mlp.gate_proj.weight': 68.34054565429688, 'model.layers.9.mlp.down_proj.weight': 70.99658203125, 'model.layers.9.mlp.up_proj.weight': 328.7184753417969, 'model.layers.9.input_layernorm.weight': 27.70631217956543, 'model.layers.9.post_attention_layernorm.weight': 27.683637619018555, 'model.layers.10.self_attn.W_pack.weight': 247.97259521484375, 'model.layers.10.self_attn.o_proj.weight': 124.3583755493164, 'model.layers.10.mlp.gate_proj.weight': 33.600921630859375, 'model.layers.10.mlp.down_proj.weight': 221.45025634765625, 'model.layers.10.mlp.up_proj.weight': 157.110595703125, 'model.layers.10.input_layernorm.weight': 7.402688980102539, 'model.layers.10.post_attention_layernorm.weight': 7.2379608154296875, 'model.layers.11.self_attn.W_pack.weight': 225.04556274414062, 'model.layers.11.self_attn.o_proj.weight': 51.29396057128906, 'model.layers.11.mlp.gate_proj.weight': 172.6842803955078, 'model.layers.11.mlp.down_proj.weight': 76.42884826660156, 'model.layers.11.mlp.up_proj.weight': 205.76620483398438, 'model.layers.11.input_layernorm.weight': 2.917478561401367, 'model.layers.11.post_attention_layernorm.weight': 26.838008880615234, 'model.layers.12.self_attn.W_pack.weight': 84.38399505615234, 'model.layers.12.self_attn.o_proj.weight': 203.1007843017578, 'model.layers.12.mlp.gate_proj.weight': 33.74456024169922, 'model.layers.12.mlp.down_proj.weight': 269.46881103515625, 'model.layers.12.mlp.up_proj.weight': 61.78874206542969, 'model.layers.12.input_layernorm.weight': 6.153356075286865, 'model.layers.12.post_attention_layernorm.weight': 37.182159423828125, 'model.layers.13.self_attn.W_pack.weight': 35.592010498046875, 'model.layers.13.self_attn.o_proj.weight': 41.04111862182617, 'model.layers.13.mlp.gate_proj.weight': 30.834117889404297, 'model.layers.13.mlp.down_proj.weight': 127.48301696777344, 'model.layers.13.mlp.up_proj.weight': 46.733009338378906, 'model.layers.13.input_layernorm.weight': 7.034879207611084, 'model.layers.13.post_attention_layernorm.weight': 20.752338409423828, 'model.layers.14.self_attn.W_pack.weight': 18.493345260620117, 'model.layers.14.self_attn.o_proj.weight': 0.13173294067382812, 'model.layers.14.mlp.gate_proj.weight': 69.10546875, 'model.layers.14.mlp.down_proj.weight': 65.01895904541016, 'model.layers.14.mlp.up_proj.weight': 120.18651580810547, 'model.layers.14.input_layernorm.weight': 56.12666320800781, 'model.layers.14.post_attention_layernorm.weight': 21.302392959594727, 'model.layers.15.self_attn.W_pack.weight': 38.476165771484375, 'model.layers.15.self_attn.o_proj.weight': 5.443084716796875, 'model.layers.15.mlp.gate_proj.weight': 49.23161315917969, 'model.layers.15.mlp.down_proj.weight': 145.4043426513672, 'model.layers.15.mlp.up_proj.weight': 191.48440551757812, 'model.layers.15.input_layernorm.weight': 18.631195068359375, 'model.layers.15.post_attention_layernorm.weight': 13.02981948852539, 'model.layers.16.self_attn.W_pack.weight': 35.01117706298828, 'model.layers.16.self_attn.o_proj.weight': 124.06249237060547, 'model.layers.16.mlp.gate_proj.weight': 29.14255142211914, 'model.layers.16.mlp.down_proj.weight': 34.8109245300293, 'model.layers.16.mlp.up_proj.weight': 49.66072082519531, 'model.layers.16.input_layernorm.weight': 42.008750915527344, 'model.layers.16.post_attention_layernorm.weight': 32.105751037597656, 'model.layers.17.self_attn.W_pack.weight': 245.72438049316406, 'model.layers.17.self_attn.o_proj.weight': 129.53009033203125, 'model.layers.17.mlp.gate_proj.weight': 39.792694091796875, 'model.layers.17.mlp.down_proj.weight': 191.07957458496094, 'model.layers.17.mlp.up_proj.weight': 81.77481079101562, 'model.layers.17.input_layernorm.weight': 15.26624584197998, 'model.layers.17.post_attention_layernorm.weight': 1.299842119216919, 'model.layers.18.self_attn.W_pack.weight': 299.31463623046875, 'model.layers.18.self_attn.o_proj.weight': 71.55064392089844, 'model.layers.18.mlp.gate_proj.weight': 0.9105682373046875, 'model.layers.18.mlp.down_proj.weight': 58.41600036621094, 'model.layers.18.mlp.up_proj.weight': 119.93107604980469, 'model.layers.18.input_layernorm.weight': 4.7208452224731445, 'model.layers.18.post_attention_layernorm.weight': 2.229631185531616, 'model.layers.19.self_attn.W_pack.weight': 262.5270690917969, 'model.layers.19.self_attn.o_proj.weight': 12.222070693969727, 'model.layers.19.mlp.gate_proj.weight': 94.86344909667969, 'model.layers.19.mlp.down_proj.weight': 126.95451354980469, 'model.layers.19.mlp.up_proj.weight': 101.88811492919922, 'model.layers.19.input_layernorm.weight': 14.57505989074707, 'model.layers.19.post_attention_layernorm.weight': 14.6399564743042, 'model.layers.20.self_attn.W_pack.weight': 75.33004760742188, 'model.layers.20.self_attn.o_proj.weight': 78.65751647949219, 'model.layers.20.mlp.gate_proj.weight': 150.36105346679688, 'model.layers.20.mlp.down_proj.weight': 65.15847778320312, 'model.layers.20.mlp.up_proj.weight': 17.968124389648438, 'model.layers.20.input_layernorm.weight': 16.402969360351562, 'model.layers.20.post_attention_layernorm.weight': 18.42514419555664, 'model.layers.21.self_attn.W_pack.weight': 31.854175567626953, 'model.layers.21.self_attn.o_proj.weight': 36.012596130371094, 'model.layers.21.mlp.gate_proj.weight': 77.72156524658203, 'model.layers.21.mlp.down_proj.weight': 74.58148193359375, 'model.layers.21.mlp.up_proj.weight': 118.97840118408203, 'model.layers.21.input_layernorm.weight': 33.38425064086914, 'model.layers.21.post_attention_layernorm.weight': 11.005084037780762, 'model.layers.22.self_attn.W_pack.weight': 85.3742446899414, 'model.layers.22.self_attn.o_proj.weight': 50.760379791259766, 'model.layers.22.mlp.gate_proj.weight': 177.3429412841797, 'model.layers.22.mlp.down_proj.weight': 23.955474853515625, 'model.layers.22.mlp.up_proj.weight': 96.12214660644531, 'model.layers.22.input_layernorm.weight': 3.831782579421997, 'model.layers.22.post_attention_layernorm.weight': 16.707836151123047, 'model.layers.23.self_attn.W_pack.weight': 83.10568237304688, 'model.layers.23.self_attn.o_proj.weight': 138.30015563964844, 'model.layers.23.mlp.gate_proj.weight': 69.77306365966797, 'model.layers.23.mlp.down_proj.weight': 162.91802978515625, 'model.layers.23.mlp.up_proj.weight': 74.60517883300781, 'model.layers.23.input_layernorm.weight': 27.26572608947754, 'model.layers.23.post_attention_layernorm.weight': 17.113075256347656, 'model.layers.24.self_attn.W_pack.weight': 83.70370483398438, 'model.layers.24.self_attn.o_proj.weight': 72.5811538696289, 'model.layers.24.mlp.gate_proj.weight': 143.89535522460938, 'model.layers.24.mlp.down_proj.weight': 145.53323364257812, 'model.layers.24.mlp.up_proj.weight': 52.176673889160156, 'model.layers.24.input_layernorm.weight': 9.975500106811523, 'model.layers.24.post_attention_layernorm.weight': 12.579216003417969, 'model.layers.25.self_attn.W_pack.weight': 106.87451171875, 'model.layers.25.self_attn.o_proj.weight': 16.385643005371094, 'model.layers.25.mlp.gate_proj.weight': 95.89348602294922, 'model.layers.25.mlp.down_proj.weight': 7.981260299682617, 'model.layers.25.mlp.up_proj.weight': 26.457712173461914, 'model.layers.25.input_layernorm.weight': 0.8130515813827515, 'model.layers.25.post_attention_layernorm.weight': 1.7665128707885742, 'model.layers.26.self_attn.W_pack.weight': 53.167076110839844, 'model.layers.26.self_attn.o_proj.weight': 1.6349420547485352, 'model.layers.26.mlp.gate_proj.weight': 24.83310317993164, 'model.layers.26.mlp.down_proj.weight': 112.59498596191406, 'model.layers.26.mlp.up_proj.weight': 45.057525634765625, 'model.layers.26.input_layernorm.weight': 21.315677642822266, 'model.layers.26.post_attention_layernorm.weight': 19.305416107177734, 'model.layers.27.self_attn.W_pack.weight': 41.60322952270508, 'model.layers.27.self_attn.o_proj.weight': 60.080299377441406, 'model.layers.27.mlp.gate_proj.weight': 76.72405242919922, 'model.layers.27.mlp.down_proj.weight': 142.64828491210938, 'model.layers.27.mlp.up_proj.weight': 89.41703033447266, 'model.layers.27.input_layernorm.weight': 27.210872650146484, 'model.layers.27.post_attention_layernorm.weight': 25.707983016967773, 'model.layers.28.self_attn.W_pack.weight': 265.5186767578125, 'model.layers.28.self_attn.o_proj.weight': 28.45724868774414, 'model.layers.28.mlp.gate_proj.weight': 54.76946258544922, 'model.layers.28.mlp.down_proj.weight': 144.5653839111328, 'model.layers.28.mlp.up_proj.weight': 75.11656188964844, 'model.layers.28.input_layernorm.weight': 7.366549491882324, 'model.layers.28.post_attention_layernorm.weight': 28.28396224975586, 'model.layers.29.self_attn.W_pack.weight': 166.41751098632812, 'model.layers.29.self_attn.o_proj.weight': 69.56593322753906, 'model.layers.29.mlp.gate_proj.weight': 218.32382202148438, 'model.layers.29.mlp.down_proj.weight': 156.04196166992188, 'model.layers.29.mlp.up_proj.weight': 48.39146423339844, 'model.layers.29.input_layernorm.weight': 46.71441650390625, 'model.layers.29.post_attention_layernorm.weight': 55.787574768066406, 'model.layers.30.self_attn.W_pack.weight': 112.06259155273438, 'model.layers.30.self_attn.o_proj.weight': 46.64891052246094, 'model.layers.30.mlp.gate_proj.weight': 105.79398345947266, 'model.layers.30.mlp.down_proj.weight': 460, 'model.layers.30.mlp.up_proj.weight': 226.6422576904297, 'model.layers.30.input_layernorm.weight': 80.7092514038086, 'model.layers.30.post_attention_layernorm.weight': 75.2825698852539, 'model.layers.31.self_attn.W_pack.weight': 420, 'model.layers.31.self_attn.o_proj.weight': 440, 'model.layers.31.mlp.gate_proj.weight': 53.389801025390625, 'model.layers.31.mlp.down_proj.weight': 400, 'model.layers.31.mlp.up_proj.weight': 141.4102783203125, 'model.layers.31.input_layernorm.weight': 70.21380615234375, 'model.layers.31.post_attention_layernorm.weight': 3.807844877243042, 'model.norm.weight': 0.12117958068847656, 'lm_head.weight': 2355.181884765625}\n",
    "sensitivity = [0]*32\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.layers\"):\n",
    "       layer = int(name.split('.')[2])\n",
    "       sensitivity[layer] += trace\n",
    "print(sensitivity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cfee5cbc-bca9-42bb-9402-9d6caf29ecef",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.13173294067382812, 0.14952468872070312, 0.5254945755004883, 0.8130515813827515, 0.9105682373046875, 1.299842119216919, 1.6349420547485352, 1.7222919464111328, 1.7665128707885742, 2.080451011657715, 2.229631185531616, 2.499436855316162, 2.917478561401367, 3.807844877243042, 3.831782579421997, 4.112678527832031, 4.7208452224731445, 4.974452018737793, 5.443084716796875, 6.153356075286865, 7.034879207611084, 7.2379608154296875, 7.285011291503906, 7.366549491882324, 7.402688980102539, 7.981260299682617, 8.410454750061035, 8.626530647277832, 8.95126724243164, 9.186905860900879, 9.365440368652344, 9.975500106811523, 10.830987930297852, 10.94842529296875, 11.005084037780762, 12.222070693969727, 12.579216003417969, 12.663371086120605, 12.824560165405273, 13.02981948852539, 14.57505989074707, 14.6399564743042, 15.26624584197998, 15.665666580200195, 16.385643005371094, 16.402969360351562, 16.707836151123047, 16.731216430664062, 16.96748924255371, 17.113075256347656, 17.129716873168945, 17.968124389648438, 18.42514419555664, 18.493345260620117, 18.49457550048828, 18.631195068359375, 18.709897994995117, 19.305416107177734, 20.60802459716797, 20.752338409423828, 21.302392959594727, 21.315677642822266, 23.955474853515625, 24.02362823486328, 24.53706169128418, 24.83310317993164, 25.089771270751953, 25.707983016967773, 26.457712173461914, 26.838008880615234, 26.916963577270508, 27.210872650146484, 27.26572608947754, 27.683637619018555, 27.70631217956543, 28.28396224975586, 28.45724868774414, 29.14255142211914, 30.206314086914062, 30.69840431213379, 30.834117889404297, 31.854175567626953, 32.105751037597656, 33.38425064086914, 33.542808532714844, 33.600921630859375, 33.72087097167969, 33.74456024169922, 34.8109245300293, 35.01117706298828, 35.592010498046875, 36.012596130371094, 36.25011444091797, 37.182159423828125, 38.476165771484375, 39.792694091796875, 41.04111862182617, 41.60322952270508, 42.008750915527344, 43.58650207519531, 44.37461853027344, 45.057525634765625, 46.64891052246094, 46.71441650390625, 46.733009338378906, 47.47933578491211, 48.39146423339844, 48.906715393066406, 49.23161315917969, 49.27372360229492, 49.37354278564453, 49.46668243408203, 49.66072082519531, 50.760379791259766, 51.29396057128906, 52.176673889160156, 53.167076110839844, 53.36305618286133, 53.389801025390625, 54.76946258544922, 55.00774383544922, 55.787574768066406, 56.05812454223633, 56.12666320800781, 57.944602966308594, 58.41600036621094, 59.52509307861328, 60.080299377441406, 61.78874206542969, 64.949462890625, 65.01895904541016, 65.15847778320312, 68.34054565429688, 69.10546875, 69.56593322753906, 69.77306365966797, 70.21380615234375, 70.99658203125, 71.55064392089844, 72.5811538696289, 74.58148193359375, 74.60517883300781, 75.11656188964844, 75.2825698852539, 75.33004760742188, 75.8370132446289, 76.42884826660156, 76.72405242919922, 77.72156524658203, 78.65751647949219, 79.34925842285156, 80.7092514038086, 81.77481079101562, 82.59011840820312, 83.10568237304688, 83.70370483398438, 84.38399505615234, 85.3742446899414, 89.41703033447266, 94.86344909667969, 95.89348602294922, 96.12214660644531, 101.88811492919922, 102.39787292480469, 105.79398345947266, 106.87451171875, 112.06259155273438, 112.59498596191406, 115.19551086425781, 117.78713989257812, 118.97840118408203, 119.93107604980469, 120.18651580810547, 124.06249237060547, 124.3583755493164, 126.95451354980469, 127.48301696777344, 129.53009033203125, 138.30015563964844, 141.4102783203125, 142.64828491210938, 143.89535522460938, 144.5653839111328, 145.4043426513672, 145.53323364257812, 149.6170196533203, 150.36105346679688, 151.09246826171875, 156.04196166992188, 157.110595703125, 162.91802978515625, 166.41751098632812, 172.6842803955078, 177.3429412841797, 178.3316192626953, 179.1116485595703, 185.375, 191.07957458496094, 191.48440551757812, 203.1007843017578, 205.76620483398438, 206.29974365234375, 218.32382202148438, 221.45025634765625, 225.04556274414062, 226.6422576904297, 230.78240966796875, 235.14219665527344, 245.72438049316406, 247.97259521484375, 262.5270690917969, 265.5186767578125, 269.46881103515625, 269.88671875, 298.6398620605469, 299.31463623046875, 310.98419189453125, 328.7184753417969, 352.04913330078125, 373.24517822265625, 1069.763671875, 3170.7841796875, 5375.14111328125, 147758.609375] 224\n"
     ]
    }
   ],
   "source": [
    "### Baichuan-7B weight-level \n",
    "hessian_trace = {'model.embed_tokens.weight': 88.54046630859375, 'model.layers.0.self_attn.W_pack.weight': 149.6170196533203, 'model.layers.0.self_attn.o_proj.weight': 25.089771270751953, 'model.layers.0.mlp.gate_proj.weight': 2.080451011657715, 'model.layers.0.mlp.down_proj.weight': 47.47933578491211, 'model.layers.0.mlp.up_proj.weight': 9.186905860900879, 'model.layers.0.input_layernorm.weight': 4.974452018737793, 'model.layers.0.post_attention_layernorm.weight': 53.36305618286133, 'model.layers.1.self_attn.W_pack.weight': 117.78713989257812, 'model.layers.1.self_attn.o_proj.weight': 102.39787292480469, 'model.layers.1.mlp.gate_proj.weight': 55.00774383544922, 'model.layers.1.mlp.down_proj.weight': 24.53706169128418, 'model.layers.1.mlp.up_proj.weight': 49.37354278564453, 'model.layers.1.input_layernorm.weight': 0.14952468872070312, 'model.layers.1.post_attention_layernorm.weight': 16.96748924255371, 'model.layers.2.self_attn.W_pack.weight': 206.29974365234375, 'model.layers.2.self_attn.o_proj.weight': 30.69840431213379, 'model.layers.2.mlp.gate_proj.weight': 15.665666580200195, 'model.layers.2.mlp.down_proj.weight': 30.206314086914062, 'model.layers.2.mlp.up_proj.weight': 1.7222919464111328, 'model.layers.2.input_layernorm.weight': 12.663371086120605, 'model.layers.2.post_attention_layernorm.weight': 17.129716873168945, 'model.layers.3.self_attn.W_pack.weight': 352.04913330078125, 'model.layers.3.self_attn.o_proj.weight': 33.542808532714844, 'model.layers.3.mlp.gate_proj.weight': 18.709897994995117, 'model.layers.3.mlp.down_proj.weight': 115.19551086425781, 'model.layers.3.mlp.up_proj.weight': 26.916963577270508, 'model.layers.3.input_layernorm.weight': 4.112678527832031, 'model.layers.3.post_attention_layernorm.weight': 8.95126724243164, 'model.layers.4.self_attn.W_pack.weight': 310.98419189453125, 'model.layers.4.self_attn.o_proj.weight': 49.46668243408203, 'model.layers.4.mlp.gate_proj.weight': 16.731216430664062, 'model.layers.4.mlp.down_proj.weight': 20.60802459716797, 'model.layers.4.mlp.up_proj.weight': 24.02362823486328, 'model.layers.4.input_layernorm.weight': 33.72087097167969, 'model.layers.4.post_attention_layernorm.weight': 8.626530647277832, 'model.layers.5.self_attn.W_pack.weight': 373.24517822265625, 'model.layers.5.self_attn.o_proj.weight': 10.94842529296875, 'model.layers.5.mlp.gate_proj.weight': 64.949462890625, 'model.layers.5.mlp.down_proj.weight': 79.34925842285156, 'model.layers.5.mlp.up_proj.weight': 56.05812454223633, 'model.layers.5.input_layernorm.weight': 9.365440368652344, 'model.layers.5.post_attention_layernorm.weight': 7.285011291503906, 'model.layers.6.self_attn.W_pack.weight': 235.14219665527344, 'model.layers.6.self_attn.o_proj.weight': 178.3316192626953, 'model.layers.6.mlp.gate_proj.weight': 59.52509307861328, 'model.layers.6.mlp.down_proj.weight': 151.09246826171875, 'model.layers.6.mlp.up_proj.weight': 36.25011444091797, 'model.layers.6.input_layernorm.weight': 8.410454750061035, 'model.layers.6.post_attention_layernorm.weight': 12.824560165405273, 'model.layers.7.self_attn.W_pack.weight': 43.58650207519531, 'model.layers.7.self_attn.o_proj.weight': 49.27372360229492, 'model.layers.7.mlp.gate_proj.weight': 57.944602966308594, 'model.layers.7.mlp.down_proj.weight': 298.6398620605469, 'model.layers.7.mlp.up_proj.weight': 0.5254945755004883, 'model.layers.7.input_layernorm.weight': 2.499436855316162, 'model.layers.7.post_attention_layernorm.weight': 18.49457550048828, 'model.layers.8.self_attn.W_pack.weight': 48.906715393066406, 'model.layers.8.self_attn.o_proj.weight': 179.1116485595703, 'model.layers.8.mlp.gate_proj.weight': 75.8370132446289, 'model.layers.8.mlp.down_proj.weight': 230.78240966796875, 'model.layers.8.mlp.up_proj.weight': 185.375, 'model.layers.8.input_layernorm.weight': 44.37461853027344, 'model.layers.8.post_attention_layernorm.weight': 10.830987930297852, 'model.layers.9.self_attn.W_pack.weight': 82.59011840820312, 'model.layers.9.self_attn.o_proj.weight': 269.88671875, 'model.layers.9.mlp.gate_proj.weight': 68.34054565429688, 'model.layers.9.mlp.down_proj.weight': 70.99658203125, 'model.layers.9.mlp.up_proj.weight': 328.7184753417969, 'model.layers.9.input_layernorm.weight': 27.70631217956543, 'model.layers.9.post_attention_layernorm.weight': 27.683637619018555, 'model.layers.10.self_attn.W_pack.weight': 247.97259521484375, 'model.layers.10.self_attn.o_proj.weight': 124.3583755493164, 'model.layers.10.mlp.gate_proj.weight': 33.600921630859375, 'model.layers.10.mlp.down_proj.weight': 221.45025634765625, 'model.layers.10.mlp.up_proj.weight': 157.110595703125, 'model.layers.10.input_layernorm.weight': 7.402688980102539, 'model.layers.10.post_attention_layernorm.weight': 7.2379608154296875, 'model.layers.11.self_attn.W_pack.weight': 225.04556274414062, 'model.layers.11.self_attn.o_proj.weight': 51.29396057128906, 'model.layers.11.mlp.gate_proj.weight': 172.6842803955078, 'model.layers.11.mlp.down_proj.weight': 76.42884826660156, 'model.layers.11.mlp.up_proj.weight': 205.76620483398438, 'model.layers.11.input_layernorm.weight': 2.917478561401367, 'model.layers.11.post_attention_layernorm.weight': 26.838008880615234, 'model.layers.12.self_attn.W_pack.weight': 84.38399505615234, 'model.layers.12.self_attn.o_proj.weight': 203.1007843017578, 'model.layers.12.mlp.gate_proj.weight': 33.74456024169922, 'model.layers.12.mlp.down_proj.weight': 269.46881103515625, 'model.layers.12.mlp.up_proj.weight': 61.78874206542969, 'model.layers.12.input_layernorm.weight': 6.153356075286865, 'model.layers.12.post_attention_layernorm.weight': 37.182159423828125, 'model.layers.13.self_attn.W_pack.weight': 35.592010498046875, 'model.layers.13.self_attn.o_proj.weight': 41.04111862182617, 'model.layers.13.mlp.gate_proj.weight': 30.834117889404297, 'model.layers.13.mlp.down_proj.weight': 127.48301696777344, 'model.layers.13.mlp.up_proj.weight': 46.733009338378906, 'model.layers.13.input_layernorm.weight': 7.034879207611084, 'model.layers.13.post_attention_layernorm.weight': 20.752338409423828, 'model.layers.14.self_attn.W_pack.weight': 18.493345260620117, 'model.layers.14.self_attn.o_proj.weight': 0.13173294067382812, 'model.layers.14.mlp.gate_proj.weight': 69.10546875, 'model.layers.14.mlp.down_proj.weight': 65.01895904541016, 'model.layers.14.mlp.up_proj.weight': 120.18651580810547, 'model.layers.14.input_layernorm.weight': 56.12666320800781, 'model.layers.14.post_attention_layernorm.weight': 21.302392959594727, 'model.layers.15.self_attn.W_pack.weight': 38.476165771484375, 'model.layers.15.self_attn.o_proj.weight': 5.443084716796875, 'model.layers.15.mlp.gate_proj.weight': 49.23161315917969, 'model.layers.15.mlp.down_proj.weight': 145.4043426513672, 'model.layers.15.mlp.up_proj.weight': 191.48440551757812, 'model.layers.15.input_layernorm.weight': 18.631195068359375, 'model.layers.15.post_attention_layernorm.weight': 13.02981948852539, 'model.layers.16.self_attn.W_pack.weight': 35.01117706298828, 'model.layers.16.self_attn.o_proj.weight': 124.06249237060547, 'model.layers.16.mlp.gate_proj.weight': 29.14255142211914, 'model.layers.16.mlp.down_proj.weight': 34.8109245300293, 'model.layers.16.mlp.up_proj.weight': 49.66072082519531, 'model.layers.16.input_layernorm.weight': 42.008750915527344, 'model.layers.16.post_attention_layernorm.weight': 32.105751037597656, 'model.layers.17.self_attn.W_pack.weight': 245.72438049316406, 'model.layers.17.self_attn.o_proj.weight': 129.53009033203125, 'model.layers.17.mlp.gate_proj.weight': 39.792694091796875, 'model.layers.17.mlp.down_proj.weight': 191.07957458496094, 'model.layers.17.mlp.up_proj.weight': 81.77481079101562, 'model.layers.17.input_layernorm.weight': 15.26624584197998, 'model.layers.17.post_attention_layernorm.weight': 1.299842119216919, 'model.layers.18.self_attn.W_pack.weight': 299.31463623046875, 'model.layers.18.self_attn.o_proj.weight': 71.55064392089844, 'model.layers.18.mlp.gate_proj.weight': 0.9105682373046875, 'model.layers.18.mlp.down_proj.weight': 58.41600036621094, 'model.layers.18.mlp.up_proj.weight': 119.93107604980469, 'model.layers.18.input_layernorm.weight': 4.7208452224731445, 'model.layers.18.post_attention_layernorm.weight': 2.229631185531616, 'model.layers.19.self_attn.W_pack.weight': 262.5270690917969, 'model.layers.19.self_attn.o_proj.weight': 12.222070693969727, 'model.layers.19.mlp.gate_proj.weight': 94.86344909667969, 'model.layers.19.mlp.down_proj.weight': 126.95451354980469, 'model.layers.19.mlp.up_proj.weight': 101.88811492919922, 'model.layers.19.input_layernorm.weight': 14.57505989074707, 'model.layers.19.post_attention_layernorm.weight': 14.6399564743042, 'model.layers.20.self_attn.W_pack.weight': 75.33004760742188, 'model.layers.20.self_attn.o_proj.weight': 78.65751647949219, 'model.layers.20.mlp.gate_proj.weight': 150.36105346679688, 'model.layers.20.mlp.down_proj.weight': 65.15847778320312, 'model.layers.20.mlp.up_proj.weight': 17.968124389648438, 'model.layers.20.input_layernorm.weight': 16.402969360351562, 'model.layers.20.post_attention_layernorm.weight': 18.42514419555664, 'model.layers.21.self_attn.W_pack.weight': 31.854175567626953, 'model.layers.21.self_attn.o_proj.weight': 36.012596130371094, 'model.layers.21.mlp.gate_proj.weight': 77.72156524658203, 'model.layers.21.mlp.down_proj.weight': 74.58148193359375, 'model.layers.21.mlp.up_proj.weight': 118.97840118408203, 'model.layers.21.input_layernorm.weight': 33.38425064086914, 'model.layers.21.post_attention_layernorm.weight': 11.005084037780762, 'model.layers.22.self_attn.W_pack.weight': 85.3742446899414, 'model.layers.22.self_attn.o_proj.weight': 50.760379791259766, 'model.layers.22.mlp.gate_proj.weight': 177.3429412841797, 'model.layers.22.mlp.down_proj.weight': 23.955474853515625, 'model.layers.22.mlp.up_proj.weight': 96.12214660644531, 'model.layers.22.input_layernorm.weight': 3.831782579421997, 'model.layers.22.post_attention_layernorm.weight': 16.707836151123047, 'model.layers.23.self_attn.W_pack.weight': 83.10568237304688, 'model.layers.23.self_attn.o_proj.weight': 138.30015563964844, 'model.layers.23.mlp.gate_proj.weight': 69.77306365966797, 'model.layers.23.mlp.down_proj.weight': 162.91802978515625, 'model.layers.23.mlp.up_proj.weight': 74.60517883300781, 'model.layers.23.input_layernorm.weight': 27.26572608947754, 'model.layers.23.post_attention_layernorm.weight': 17.113075256347656, 'model.layers.24.self_attn.W_pack.weight': 83.70370483398438, 'model.layers.24.self_attn.o_proj.weight': 72.5811538696289, 'model.layers.24.mlp.gate_proj.weight': 143.89535522460938, 'model.layers.24.mlp.down_proj.weight': 145.53323364257812, 'model.layers.24.mlp.up_proj.weight': 52.176673889160156, 'model.layers.24.input_layernorm.weight': 9.975500106811523, 'model.layers.24.post_attention_layernorm.weight': 12.579216003417969, 'model.layers.25.self_attn.W_pack.weight': 106.87451171875, 'model.layers.25.self_attn.o_proj.weight': 16.385643005371094, 'model.layers.25.mlp.gate_proj.weight': 95.89348602294922, 'model.layers.25.mlp.down_proj.weight': 7.981260299682617, 'model.layers.25.mlp.up_proj.weight': 26.457712173461914, 'model.layers.25.input_layernorm.weight': 0.8130515813827515, 'model.layers.25.post_attention_layernorm.weight': 1.7665128707885742, 'model.layers.26.self_attn.W_pack.weight': 53.167076110839844, 'model.layers.26.self_attn.o_proj.weight': 1.6349420547485352, 'model.layers.26.mlp.gate_proj.weight': 24.83310317993164, 'model.layers.26.mlp.down_proj.weight': 112.59498596191406, 'model.layers.26.mlp.up_proj.weight': 45.057525634765625, 'model.layers.26.input_layernorm.weight': 21.315677642822266, 'model.layers.26.post_attention_layernorm.weight': 19.305416107177734, 'model.layers.27.self_attn.W_pack.weight': 41.60322952270508, 'model.layers.27.self_attn.o_proj.weight': 60.080299377441406, 'model.layers.27.mlp.gate_proj.weight': 76.72405242919922, 'model.layers.27.mlp.down_proj.weight': 142.64828491210938, 'model.layers.27.mlp.up_proj.weight': 89.41703033447266, 'model.layers.27.input_layernorm.weight': 27.210872650146484, 'model.layers.27.post_attention_layernorm.weight': 25.707983016967773, 'model.layers.28.self_attn.W_pack.weight': 265.5186767578125, 'model.layers.28.self_attn.o_proj.weight': 28.45724868774414, 'model.layers.28.mlp.gate_proj.weight': 54.76946258544922, 'model.layers.28.mlp.down_proj.weight': 144.5653839111328, 'model.layers.28.mlp.up_proj.weight': 75.11656188964844, 'model.layers.28.input_layernorm.weight': 7.366549491882324, 'model.layers.28.post_attention_layernorm.weight': 28.28396224975586, 'model.layers.29.self_attn.W_pack.weight': 166.41751098632812, 'model.layers.29.self_attn.o_proj.weight': 69.56593322753906, 'model.layers.29.mlp.gate_proj.weight': 218.32382202148438, 'model.layers.29.mlp.down_proj.weight': 156.04196166992188, 'model.layers.29.mlp.up_proj.weight': 48.39146423339844, 'model.layers.29.input_layernorm.weight': 46.71441650390625, 'model.layers.29.post_attention_layernorm.weight': 55.787574768066406, 'model.layers.30.self_attn.W_pack.weight': 112.06259155273438, 'model.layers.30.self_attn.o_proj.weight': 46.64891052246094, 'model.layers.30.mlp.gate_proj.weight': 105.79398345947266, 'model.layers.30.mlp.down_proj.weight': 460, 'model.layers.30.mlp.up_proj.weight': 226.6422576904297, 'model.layers.30.input_layernorm.weight': 80.7092514038086, 'model.layers.30.post_attention_layernorm.weight': 75.2825698852539, 'model.layers.31.self_attn.W_pack.weight': 420, 'model.layers.31.self_attn.o_proj.weight': 440, 'model.layers.31.mlp.gate_proj.weight': 53.389801025390625, 'model.layers.31.mlp.down_proj.weight': 400, 'model.layers.31.mlp.up_proj.weight': 141.4102783203125, 'model.layers.31.input_layernorm.weight': 70.21380615234375, 'model.layers.31.post_attention_layernorm.weight': 3.807844877243042, 'model.norm.weight': 0.12117958068847656, 'lm_head.weight': 2355.181884765625}\n",
    "sensitivity = []*32\n",
    "dict = {}\n",
    "clayer = 0\n",
    "total_weight = []\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.layers\"):\n",
    "       layer = int(name.split('.')[2])\n",
    "       if layer<0 and layer>=32:\n",
    "           continue\n",
    "       if clayer < layer:\n",
    "           clayer = layer\n",
    "           sensitivity.append(dict)\n",
    "           dict = {}\n",
    "       subname = \".\".join(name.split('.')[3:])\n",
    "       if subname.endswith(\".weight\"):\n",
    "           dict[subname[:-7]] = trace\n",
    "           total_weight.append(trace)\n",
    "sensitivity.append(dict)\n",
    "#print(sensitivity, len(sensitivity))\n",
    "total_weight = sorted(total_weight)\n",
    "print(sorted(total_weight), len(total_weight))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "bd13d42f-7b4c-40a8-b360-65474250d187",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.026955127716064453, 0.08977508544921875, 0.10369515419006348, 0.13170671463012695, 0.17875725030899048, 0.23795080184936523, 0.2486521601676941, 0.2925376892089844, 0.3602757453918457, 0.36902618408203125, 0.44418221712112427, 0.563406229019165, 0.5701155662536621, 0.5709850788116455, 0.5775845050811768, 0.618068277835846, 0.6504426598548889, 0.6786346435546875, 0.7303394079208374, 0.7759560346603394, 0.7860095500946045, 0.8228187561035156, 0.8661864995956421, 0.869401216506958, 0.9094091653823853, 0.9103370904922485, 1.005544662475586, 1.0265004634857178, 1.038621425628662, 1.0674114227294922, 1.151098370552063, 1.1667490005493164, 1.2580251693725586, 1.272134780883789, 1.299586296081543, 1.3211865425109863, 1.4120287895202637, 1.420072078704834, 1.438662052154541, 1.4855751991271973, 1.5049347877502441, 1.5392000675201416, 1.5559754371643066, 1.557397723197937, 1.612947940826416, 1.7167227268218994, 1.7471524477005005, 1.8519139289855957, 1.9344422817230225, 1.9854681491851807, 2.0408926010131836, 2.0770699977874756, 2.08560848236084, 2.156057357788086, 2.1628215312957764, 2.1733036041259766, 2.173788547515869, 2.2183523178100586, 2.2429981231689453, 2.381711006164551, 2.5193252563476562, 2.5704407691955566, 2.6763761043548584, 2.8043265342712402, 2.822531223297119, 2.846062660217285, 2.922790050506592, 2.9446234703063965, 2.946209192276001, 2.9682884216308594, 2.9729299545288086, 2.989630699157715, 3.031505823135376, 3.127918243408203, 3.144291877746582, 3.2290384769439697, 3.268123149871826, 3.3430747985839844, 3.502904176712036, 3.51692271232605, 3.556614398956299, 3.5581796169281006, 3.5722851753234863, 3.636402130126953, 3.681488275527954, 3.718046188354492, 3.7910373210906982, 3.811335325241089, 3.875288963317871, 3.9071273803710938, 3.9590940475463867, 4.03817892074585, 4.185600757598877, 4.2500410079956055, 4.266239166259766, 4.351631164550781, 4.380916118621826, 4.505777359008789, 4.514029502868652, 4.575880527496338, 4.846256732940674, 4.980310440063477, 5.008224964141846, 5.047867774963379, 5.1649274826049805, 5.244297981262207, 5.254383087158203, 5.255586624145508, 5.312793731689453, 5.32280969619751, 5.327197074890137, 5.414121627807617, 5.442895889282227, 5.477107048034668, 5.497315406799316, 5.590884685516357, 5.603117942810059, 5.693725109100342, 5.899119853973389, 5.910244941711426, 6.057321071624756, 6.143045425415039, 6.184281826019287, 6.196925163269043, 6.233829498291016, 6.276976585388184, 6.370615005493164, 6.444578170776367, 6.45750617980957, 6.472822189331055, 6.556665420532227, 6.5724992752075195, 6.6037702560424805, 6.6158013343811035, 6.688344478607178, 6.757972717285156, 6.76661491394043, 6.79459285736084, 6.806645393371582, 6.859281539916992, 6.868413925170898, 6.899668216705322, 6.938807487487793, 6.952565670013428, 7.120499610900879, 7.241024017333984, 7.263874530792236, 7.80699348449707, 7.8979291915893555, 8.015764236450195, 8.171716690063477, 8.178665161132812, 8.20296859741211, 8.491037368774414, 8.491867065429688, 8.511298179626465, 8.554145812988281, 8.557249069213867, 8.597291946411133, 8.599519729614258, 8.661575317382812, 8.682978630065918, 8.752952575683594, 8.934866905212402, 8.99644947052002, 9.01853084564209, 9.184150695800781, 9.370157241821289, 9.381475448608398, 9.448954582214355, 9.498764991760254, 9.747720718383789, 9.804179191589355, 10.152000427246094, 10.208890914916992, 10.223437309265137, 10.232771873474121, 10.242692947387695, 10.244279861450195, 10.29954719543457, 10.332550048828125, 10.512006759643555, 10.559869766235352, 10.686168670654297, 10.748720169067383, 10.770637512207031, 11.099241256713867, 11.106266975402832, 11.141361236572266, 11.322927474975586, 11.38267993927002, 11.47945499420166, 11.596566200256348, 11.699947357177734, 12.077723503112793, 12.23161506652832, 12.488154411315918, 12.737123489379883, 12.813602447509766, 13.124128341674805, 13.135514259338379, 13.290608406066895, 13.45847225189209, 13.542013168334961, 13.550643920898438, 13.707676887512207, 13.714803695678711, 13.811075210571289, 14.069433212280273, 14.207660675048828, 14.365117073059082, 14.931571960449219, 15.485784530639648, 15.748284339904785, 15.76305866241455, 16.195175170898438, 16.39956283569336, 16.40769386291504, 16.860023498535156, 17.189271926879883, 17.343656539916992, 17.35148811340332, 17.469255447387695, 18.17951202392578, 18.28523063659668, 18.32794761657715, 18.92733383178711, 19.40510368347168, 19.635438919067383, 20.122352600097656, 20.425907135009766, 20.474842071533203, 20.53281021118164, 20.736440658569336, 21.53173828125, 21.660659790039062, 21.77808380126953, 22.094173431396484, 22.27927589416504, 22.622234344482422, 22.90517807006836, 22.931411743164062, 23.3612060546875, 23.82508087158203, 23.90243148803711, 23.931339263916016, 24.105525970458984, 24.186344146728516, 24.453598022460938, 24.49260139465332, 24.52692413330078, 24.726085662841797, 25.358905792236328, 25.415205001831055, 25.87903594970703, 25.996774673461914, 27.561439514160156, 27.64804458618164, 27.744571685791016, 27.936511993408203, 27.965742111206055, 28.216276168823242, 28.811073303222656, 29.07036018371582, 29.1208438873291, 29.457656860351562, 29.76224136352539, 30.93912124633789, 30.94979476928711, 31.385406494140625, 31.514244079589844, 31.7966251373291, 31.85358428955078, 32.012237548828125, 32.621429443359375, 33.314842224121094, 33.6893310546875, 33.852622985839844, 34.40975570678711, 34.58034896850586, 34.76072692871094, 34.800193786621094, 34.86315155029297, 34.97863006591797, 35.2567253112793, 35.56208038330078, 35.65726089477539, 35.782020568847656, 35.93079376220703, 37.986053466796875, 38.5163459777832, 38.57417678833008, 39.48937225341797, 39.719139099121094, 39.98316955566406, 40.59729766845703, 40.968505859375, 41.386234283447266, 41.46052551269531, 42.43809127807617, 43.002166748046875, 43.143367767333984, 43.22706985473633, 43.35295867919922, 43.37471008300781, 43.44670104980469, 43.679412841796875, 43.7214469909668, 44.59624481201172, 45.838008880615234, 46.170135498046875, 47.00330352783203, 47.43613815307617, 47.43947219848633, 47.882354736328125, 48.83298110961914, 49.33600616455078, 49.57487106323242, 50.10617446899414, 50.19719696044922, 50.439903259277344, 52.596717834472656, 53.172977447509766, 53.503387451171875, 54.34690856933594, 55.132442474365234, 56.192169189453125, 57.027591705322266, 57.91218566894531, 58.0107536315918, 59.79261779785156, 60.04924011230469, 61.06828689575195, 61.840152740478516, 62.052486419677734, 62.10081481933594, 64.46712493896484, 73.01953125, 73.6126480102539, 74.53105163574219, 75.22076416015625, 76.23828125, 77.28025817871094, 78.60881042480469, 84.3836898803711, 86.69190216064453, 96.71162414550781, 101.67803955078125, 103.10391998291016, 106.26475524902344, 110.98594665527344, 121.09927368164062, 134.8905029296875, 193.5174560546875, 209.4793243408203, 210.18023681640625, 225.67352294921875, 318.325439453125, 360.467041015625, 1922.208740234375] 360\n"
     ]
    }
   ],
   "source": [
    "### LLaMA-13B weight-level\n",
    "hessian_trace = {'model.embed_tokens.weight': 112.91504669189453, 'model.layers.0.self_attn.q_proj.weight': 2.1628215312957764, 'model.layers.0.self_attn.k_proj.weight': 11.47945499420166, 'model.layers.0.self_attn.v_proj.weight': 0.026955127716064453, 'model.layers.0.self_attn.o_proj.weight': 10.332550048828125, 'model.layers.0.mlp.gate_proj.weight': 32.012237548828125, 'model.layers.0.mlp.up_proj.weight': 10.208890914916992, 'model.layers.0.mlp.down_proj.weight': 14.931571960449219, 'model.layers.0.input_layernorm.weight': 11.106266975402832, 'model.layers.0.post_attention_layernorm.weight': 8.015764236450195, 'model.layers.1.self_attn.q_proj.weight': 13.45847225189209, 'model.layers.1.self_attn.k_proj.weight': 17.469255447387695, 'model.layers.1.self_attn.v_proj.weight': 318.325439453125, 'model.layers.1.self_attn.o_proj.weight': 62.10081481933594, 'model.layers.1.mlp.gate_proj.weight': 8.171716690063477, 'model.layers.1.mlp.up_proj.weight': 27.64804458618164, 'model.layers.1.mlp.down_proj.weight': 77.28025817871094, 'model.layers.1.input_layernorm.weight': 48.83298110961914, 'model.layers.1.post_attention_layernorm.weight': 43.143367767333984, 'model.layers.2.self_attn.q_proj.weight': 9.498764991760254, 'model.layers.2.self_attn.k_proj.weight': 24.726085662841797, 'model.layers.2.self_attn.v_proj.weight': 209.4793243408203, 'model.layers.2.self_attn.o_proj.weight': 3.718046188354492, 'model.layers.2.mlp.gate_proj.weight': 41.46052551269531, 'model.layers.2.mlp.up_proj.weight': 22.622234344482422, 'model.layers.2.mlp.down_proj.weight': 1922.208740234375, 'model.layers.2.input_layernorm.weight': 4.980310440063477, 'model.layers.2.post_attention_layernorm.weight': 55.132442474365234, 'model.layers.3.self_attn.q_proj.weight': 4.505777359008789, 'model.layers.3.self_attn.k_proj.weight': 35.93079376220703, 'model.layers.3.self_attn.v_proj.weight': 46.170135498046875, 'model.layers.3.self_attn.o_proj.weight': 62.052486419677734, 'model.layers.3.mlp.gate_proj.weight': 20.53281021118164, 'model.layers.3.mlp.up_proj.weight': 29.07036018371582, 'model.layers.3.mlp.down_proj.weight': 50.10617446899414, 'model.layers.3.input_layernorm.weight': 96.71162414550781, 'model.layers.3.post_attention_layernorm.weight': 25.996774673461914, 'model.layers.4.self_attn.q_proj.weight': 37.986053466796875, 'model.layers.4.self_attn.k_proj.weight': 9.184150695800781, 'model.layers.4.self_attn.v_proj.weight': 61.840152740478516, 'model.layers.4.self_attn.o_proj.weight': 43.44670104980469, 'model.layers.4.mlp.gate_proj.weight': 38.5163459777832, 'model.layers.4.mlp.up_proj.weight': 33.6893310546875, 'model.layers.4.mlp.down_proj.weight': 106.26475524902344, 'model.layers.4.input_layernorm.weight': 20.474842071533203, 'model.layers.4.post_attention_layernorm.weight': 21.660659790039062, 'model.layers.5.self_attn.q_proj.weight': 5.497315406799316, 'model.layers.5.self_attn.k_proj.weight': 61.06828689575195, 'model.layers.5.self_attn.v_proj.weight': 134.8905029296875, 'model.layers.5.self_attn.o_proj.weight': 103.10391998291016, 'model.layers.5.mlp.gate_proj.weight': 10.223437309265137, 'model.layers.5.mlp.up_proj.weight': 58.0107536315918, 'model.layers.5.mlp.down_proj.weight': 210.18023681640625, 'model.layers.5.input_layernorm.weight': 50.19719696044922, 'model.layers.5.post_attention_layernorm.weight': 2.5704407691955566, 'model.layers.6.self_attn.q_proj.weight': 5.047867774963379, 'model.layers.6.self_attn.k_proj.weight': 53.503387451171875, 'model.layers.6.self_attn.v_proj.weight': 47.43947219848633, 'model.layers.6.self_attn.o_proj.weight': 39.719139099121094, 'model.layers.6.mlp.gate_proj.weight': 86.69190216064453, 'model.layers.6.mlp.up_proj.weight': 43.37471008300781, 'model.layers.6.mlp.down_proj.weight': 225.67352294921875, 'model.layers.6.input_layernorm.weight': 8.178665161132812, 'model.layers.6.post_attention_layernorm.weight': 31.385406494140625, 'model.layers.7.self_attn.q_proj.weight': 84.3836898803711, 'model.layers.7.self_attn.k_proj.weight': 8.599519729614258, 'model.layers.7.self_attn.v_proj.weight': 193.5174560546875, 'model.layers.7.self_attn.o_proj.weight': 3.9590940475463867, 'model.layers.7.mlp.gate_proj.weight': 56.192169189453125, 'model.layers.7.mlp.up_proj.weight': 10.232771873474121, 'model.layers.7.mlp.down_proj.weight': 121.09927368164062, 'model.layers.7.input_layernorm.weight': 7.241024017333984, 'model.layers.7.post_attention_layernorm.weight': 17.189271926879883, 'model.layers.8.self_attn.q_proj.weight': 22.27927589416504, 'model.layers.8.self_attn.k_proj.weight': 8.511298179626465, 'model.layers.8.self_attn.v_proj.weight': 2.1733036041259766, 'model.layers.8.self_attn.o_proj.weight': 24.52692413330078, 'model.layers.8.mlp.gate_proj.weight': 39.98316955566406, 'model.layers.8.mlp.up_proj.weight': 6.184281826019287, 'model.layers.8.mlp.down_proj.weight': 33.852622985839844, 'model.layers.8.input_layernorm.weight': 4.266239166259766, 'model.layers.8.post_attention_layernorm.weight': 5.312793731689453, 'model.layers.9.self_attn.q_proj.weight': 10.686168670654297, 'model.layers.9.self_attn.k_proj.weight': 31.85358428955078, 'model.layers.9.self_attn.v_proj.weight': 28.216276168823242, 'model.layers.9.self_attn.o_proj.weight': 78.60881042480469, 'model.layers.9.mlp.gate_proj.weight': 47.43613815307617, 'model.layers.9.mlp.up_proj.weight': 12.077723503112793, 'model.layers.9.mlp.down_proj.weight': 73.6126480102539, 'model.layers.9.input_layernorm.weight': 3.681488275527954, 'model.layers.9.post_attention_layernorm.weight': 13.124128341674805, 'model.layers.10.self_attn.q_proj.weight': 57.91218566894531, 'model.layers.10.self_attn.k_proj.weight': 18.92733383178711, 'model.layers.10.self_attn.v_proj.weight': 73.01953125, 'model.layers.10.self_attn.o_proj.weight': 35.782020568847656, 'model.layers.10.mlp.gate_proj.weight': 19.40510368347168, 'model.layers.10.mlp.up_proj.weight': 47.882354736328125, 'model.layers.10.mlp.down_proj.weight': 10.242692947387695, 'model.layers.10.input_layernorm.weight': 10.512006759643555, 'model.layers.10.post_attention_layernorm.weight': 6.233829498291016, 'model.layers.11.self_attn.q_proj.weight': 13.542013168334961, 'model.layers.11.self_attn.k_proj.weight': 34.800193786621094, 'model.layers.11.self_attn.v_proj.weight': 75.22076416015625, 'model.layers.11.self_attn.o_proj.weight': 110.98594665527344, 'model.layers.11.mlp.gate_proj.weight': 57.027591705322266, 'model.layers.11.mlp.up_proj.weight': 19.635438919067383, 'model.layers.11.mlp.down_proj.weight': 60.04924011230469, 'model.layers.11.input_layernorm.weight': 2.2429981231689453, 'model.layers.11.post_attention_layernorm.weight': 1.612947940826416, 'model.layers.12.self_attn.q_proj.weight': 8.661575317382812, 'model.layers.12.self_attn.k_proj.weight': 24.105525970458984, 'model.layers.12.self_attn.v_proj.weight': 32.621429443359375, 'model.layers.12.self_attn.o_proj.weight': 6.45750617980957, 'model.layers.12.mlp.gate_proj.weight': 27.936511993408203, 'model.layers.12.mlp.up_proj.weight': 5.414121627807617, 'model.layers.12.mlp.down_proj.weight': 27.561439514160156, 'model.layers.12.input_layernorm.weight': 2.5193252563476562, 'model.layers.12.post_attention_layernorm.weight': 3.875288963317871, 'model.layers.13.self_attn.q_proj.weight': 22.90517807006836, 'model.layers.13.self_attn.k_proj.weight': 25.415205001831055, 'model.layers.13.self_attn.v_proj.weight': 45.838008880615234, 'model.layers.13.self_attn.o_proj.weight': 25.87903594970703, 'model.layers.13.mlp.gate_proj.weight': 9.804179191589355, 'model.layers.13.mlp.up_proj.weight': 43.002166748046875, 'model.layers.13.mlp.down_proj.weight': 41.386234283447266, 'model.layers.13.input_layernorm.weight': 2.6763761043548584, 'model.layers.13.post_attention_layernorm.weight': 8.554145812988281, 'model.layers.14.self_attn.q_proj.weight': 5.910244941711426, 'model.layers.14.self_attn.k_proj.weight': 6.757972717285156, 'model.layers.14.self_attn.v_proj.weight': 15.748284339904785, 'model.layers.14.self_attn.o_proj.weight': 74.53105163574219, 'model.layers.14.mlp.gate_proj.weight': 10.152000427246094, 'model.layers.14.mlp.up_proj.weight': 14.069433212280273, 'model.layers.14.mlp.down_proj.weight': 49.33600616455078, 'model.layers.14.input_layernorm.weight': 1.7167227268218994, 'model.layers.14.post_attention_layernorm.weight': 0.3602757453918457, 'model.layers.15.self_attn.q_proj.weight': 6.938807487487793, 'model.layers.15.self_attn.k_proj.weight': 0.5701155662536621, 'model.layers.15.self_attn.v_proj.weight': 6.370615005493164, 'model.layers.15.self_attn.o_proj.weight': 1.272134780883789, 'model.layers.15.mlp.gate_proj.weight': 43.679412841796875, 'model.layers.15.mlp.up_proj.weight': 5.254383087158203, 'model.layers.15.mlp.down_proj.weight': 34.97863006591797, 'model.layers.15.input_layernorm.weight': 2.989630699157715, 'model.layers.15.post_attention_layernorm.weight': 0.5775845050811768, 'model.layers.16.self_attn.q_proj.weight': 18.28523063659668, 'model.layers.16.self_attn.k_proj.weight': 12.488154411315918, 'model.layers.16.self_attn.v_proj.weight': 39.48937225341797, 'model.layers.16.self_attn.o_proj.weight': 31.514244079589844, 'model.layers.16.mlp.gate_proj.weight': 4.351631164550781, 'model.layers.16.mlp.up_proj.weight': 5.693725109100342, 'model.layers.16.mlp.down_proj.weight': 40.59729766845703, 'model.layers.16.input_layernorm.weight': 1.420072078704834, 'model.layers.16.post_attention_layernorm.weight': 4.575880527496338, 'model.layers.17.self_attn.q_proj.weight': 1.005544662475586, 'model.layers.17.self_attn.k_proj.weight': 35.2567253112793, 'model.layers.17.self_attn.v_proj.weight': 28.811073303222656, 'model.layers.17.self_attn.o_proj.weight': 20.425907135009766, 'model.layers.17.mlp.gate_proj.weight': 1.2580251693725586, 'model.layers.17.mlp.up_proj.weight': 0.6786346435546875, 'model.layers.17.mlp.down_proj.weight': 54.34690856933594, 'model.layers.17.input_layernorm.weight': 3.268123149871826, 'model.layers.17.post_attention_layernorm.weight': 2.9729299545288086, 'model.layers.18.self_attn.q_proj.weight': 5.477107048034668, 'model.layers.18.self_attn.k_proj.weight': 1.4855751991271973, 'model.layers.18.self_attn.v_proj.weight': 6.806645393371582, 'model.layers.18.self_attn.o_proj.weight': 30.93912124633789, 'model.layers.18.mlp.gate_proj.weight': 23.3612060546875, 'model.layers.18.mlp.up_proj.weight': 10.244279861450195, 'model.layers.18.mlp.down_proj.weight': 43.35295867919922, 'model.layers.18.input_layernorm.weight': 3.7910373210906982, 'model.layers.18.post_attention_layernorm.weight': 6.899668216705322, 'model.layers.19.self_attn.q_proj.weight': 11.141361236572266, 'model.layers.19.self_attn.k_proj.weight': 0.563406229019165, 'model.layers.19.self_attn.v_proj.weight': 16.39956283569336, 'model.layers.19.self_attn.o_proj.weight': 3.811335325241089, 'model.layers.19.mlp.gate_proj.weight': 13.135514259338379, 'model.layers.19.mlp.up_proj.weight': 42.43809127807617, 'model.layers.19.mlp.down_proj.weight': 64.46712493896484, 'model.layers.19.input_layernorm.weight': 3.3430747985839844, 'model.layers.19.post_attention_layernorm.weight': 5.590884685516357, 'model.layers.20.self_attn.q_proj.weight': 4.185600757598877, 'model.layers.20.self_attn.k_proj.weight': 6.444578170776367, 'model.layers.20.self_attn.v_proj.weight': 59.79261779785156, 'model.layers.20.self_attn.o_proj.weight': 6.556665420532227, 'model.layers.20.mlp.gate_proj.weight': 29.457656860351562, 'model.layers.20.mlp.up_proj.weight': 13.811075210571289, 'model.layers.20.mlp.down_proj.weight': 11.322927474975586, 'model.layers.20.input_layernorm.weight': 7.120499610900879, 'model.layers.20.post_attention_layernorm.weight': 12.737123489379883, 'model.layers.21.self_attn.q_proj.weight': 12.813602447509766, 'model.layers.21.self_attn.k_proj.weight': 6.472822189331055, 'model.layers.21.self_attn.v_proj.weight': 31.7966251373291, 'model.layers.21.self_attn.o_proj.weight': 40.968505859375, 'model.layers.21.mlp.gate_proj.weight': 23.82508087158203, 'model.layers.21.mlp.up_proj.weight': 21.53173828125, 'model.layers.21.mlp.down_proj.weight': 24.49260139465332, 'model.layers.21.input_layernorm.weight': 1.151098370552063, 'model.layers.21.post_attention_layernorm.weight': 13.290608406066895, 'model.layers.22.self_attn.q_proj.weight': 10.748720169067383, 'model.layers.22.self_attn.k_proj.weight': 6.76661491394043, 'model.layers.22.self_attn.v_proj.weight': 13.550643920898438, 'model.layers.22.self_attn.o_proj.weight': 11.699947357177734, 'model.layers.22.mlp.gate_proj.weight': 20.122352600097656, 'model.layers.22.mlp.up_proj.weight': 3.5722851753234863, 'model.layers.22.mlp.down_proj.weight': 38.57417678833008, 'model.layers.22.input_layernorm.weight': 4.846256732940674, 'model.layers.22.post_attention_layernorm.weight': 16.195175170898438, 'model.layers.23.self_attn.q_proj.weight': 8.20296859741211, 'model.layers.23.self_attn.k_proj.weight': 1.8519139289855957, 'model.layers.23.self_attn.v_proj.weight': 21.77808380126953, 'model.layers.23.self_attn.o_proj.weight': 6.688344478607178, 'model.layers.23.mlp.gate_proj.weight': 16.860023498535156, 'model.layers.23.mlp.up_proj.weight': 18.17951202392578, 'model.layers.23.mlp.down_proj.weight': 47.00330352783203, 'model.layers.23.input_layernorm.weight': 0.6504426598548889, 'model.layers.23.post_attention_layernorm.weight': 9.01853084564209, 'model.layers.24.self_attn.q_proj.weight': 5.603117942810059, 'model.layers.24.self_attn.k_proj.weight': 2.2183523178100586, 'model.layers.24.self_attn.v_proj.weight': 33.314842224121094, 'model.layers.24.self_attn.o_proj.weight': 2.946209192276001, 'model.layers.24.mlp.gate_proj.weight': 8.99644947052002, 'model.layers.24.mlp.up_proj.weight': 0.23795080184936523, 'model.layers.24.mlp.down_proj.weight': 15.76305866241455, 'model.layers.24.input_layernorm.weight': 0.2486521601676941, 'model.layers.24.post_attention_layernorm.weight': 6.057321071624756, 'model.layers.25.self_attn.q_proj.weight': 3.144291877746582, 'model.layers.25.self_attn.k_proj.weight': 6.196925163269043, 'model.layers.25.self_attn.v_proj.weight': 3.5581796169281006, 'model.layers.25.self_attn.o_proj.weight': 2.0408926010131836, 'model.layers.25.mlp.gate_proj.weight': 30.94979476928711, 'model.layers.25.mlp.up_proj.weight': 9.370157241821289, 'model.layers.25.mlp.down_proj.weight': 50.439903259277344, 'model.layers.25.input_layernorm.weight': 1.4120287895202637, 'model.layers.25.post_attention_layernorm.weight': 2.173788547515869, 'model.layers.26.self_attn.q_proj.weight': 3.127918243408203, 'model.layers.26.self_attn.k_proj.weight': 0.13170671463012695, 'model.layers.26.self_attn.v_proj.weight': 4.514029502868652, 'model.layers.26.self_attn.o_proj.weight': 11.099241256713867, 'model.layers.26.mlp.gate_proj.weight': 34.76072692871094, 'model.layers.26.mlp.up_proj.weight': 10.770637512207031, 'model.layers.26.mlp.down_proj.weight': 43.22706985473633, 'model.layers.26.input_layernorm.weight': 0.44418221712112427, 'model.layers.26.post_attention_layernorm.weight': 3.502904176712036, 'model.layers.27.self_attn.q_proj.weight': 0.9103370904922485, 'model.layers.27.self_attn.k_proj.weight': 7.263874530792236, 'model.layers.27.self_attn.v_proj.weight': 0.8228187561035156, 'model.layers.27.self_attn.o_proj.weight': 0.08977508544921875, 'model.layers.27.mlp.gate_proj.weight': 25.358905792236328, 'model.layers.27.mlp.up_proj.weight': 6.6037702560424805, 'model.layers.27.mlp.down_proj.weight': 23.90243148803711, 'model.layers.27.input_layernorm.weight': 0.5709850788116455, 'model.layers.27.post_attention_layernorm.weight': 2.156057357788086, 'model.layers.28.self_attn.q_proj.weight': 0.10369515419006348, 'model.layers.28.self_attn.k_proj.weight': 0.7759560346603394, 'model.layers.28.self_attn.v_proj.weight': 4.380916118621826, 'model.layers.28.self_attn.o_proj.weight': 0.7860095500946045, 'model.layers.28.mlp.gate_proj.weight': 13.714803695678711, 'model.layers.28.mlp.up_proj.weight': 9.747720718383789, 'model.layers.28.mlp.down_proj.weight': 43.7214469909668, 'model.layers.28.input_layernorm.weight': 0.2925376892089844, 'model.layers.28.post_attention_layernorm.weight': 1.5559754371643066, 'model.layers.29.self_attn.q_proj.weight': 3.2290384769439697, 'model.layers.29.self_attn.k_proj.weight': 5.244297981262207, 'model.layers.29.self_attn.v_proj.weight': 6.143045425415039, 'model.layers.29.self_attn.o_proj.weight': 16.40769386291504, 'model.layers.29.mlp.gate_proj.weight': 5.008224964141846, 'model.layers.29.mlp.up_proj.weight': 10.559869766235352, 'model.layers.29.mlp.down_proj.weight': 6.859281539916992, 'model.layers.29.input_layernorm.weight': 1.3211865425109863, 'model.layers.29.post_attention_layernorm.weight': 2.8043265342712402, 'model.layers.30.self_attn.q_proj.weight': 8.491867065429688, 'model.layers.30.self_attn.k_proj.weight': 4.03817892074585, 'model.layers.30.self_attn.v_proj.weight': 6.276976585388184, 'model.layers.30.self_attn.o_proj.weight': 2.08560848236084, 'model.layers.30.mlp.gate_proj.weight': 27.744571685791016, 'model.layers.30.mlp.up_proj.weight': 5.1649274826049805, 'model.layers.30.mlp.down_proj.weight': 14.365117073059082, 'model.layers.30.input_layernorm.weight': 0.9094091653823853, 'model.layers.30.post_attention_layernorm.weight': 1.0265004634857178, 'model.layers.31.self_attn.q_proj.weight': 1.5392000675201416, 'model.layers.31.self_attn.k_proj.weight': 6.868413925170898, 'model.layers.31.self_attn.v_proj.weight': 12.23161506652832, 'model.layers.31.self_attn.o_proj.weight': 17.343656539916992, 'model.layers.31.mlp.gate_proj.weight': 3.636402130126953, 'model.layers.31.mlp.up_proj.weight': 2.9446234703063965, 'model.layers.31.mlp.down_proj.weight': 5.442895889282227, 'model.layers.31.input_layernorm.weight': 1.7471524477005005, 'model.layers.31.post_attention_layernorm.weight': 1.1667490005493164, 'model.layers.32.self_attn.q_proj.weight': 6.952565670013428, 'model.layers.32.self_attn.k_proj.weight': 5.255586624145508, 'model.layers.32.self_attn.v_proj.weight': 7.8979291915893555, 'model.layers.32.self_attn.o_proj.weight': 2.0770699977874756, 'model.layers.32.mlp.gate_proj.weight': 35.65726089477539, 'model.layers.32.mlp.up_proj.weight': 4.2500410079956055, 'model.layers.32.mlp.down_proj.weight': 34.40975570678711, 'model.layers.32.input_layernorm.weight': 0.618068277835846, 'model.layers.32.post_attention_layernorm.weight': 1.5049347877502441, 'model.layers.33.self_attn.q_proj.weight': 1.9344422817230225, 'model.layers.33.self_attn.k_proj.weight': 5.899119853973389, 'model.layers.33.self_attn.v_proj.weight': 10.29954719543457, 'model.layers.33.self_attn.o_proj.weight': 17.35148811340332, 'model.layers.33.mlp.gate_proj.weight': 53.172977447509766, 'model.layers.33.mlp.up_proj.weight': 52.596717834472656, 'model.layers.33.mlp.down_proj.weight': 35.56208038330078, 'model.layers.33.input_layernorm.weight': 1.557397723197937, 'model.layers.33.post_attention_layernorm.weight': 2.381711006164551, 'model.layers.34.self_attn.q_proj.weight': 7.80699348449707, 'model.layers.34.self_attn.k_proj.weight': 15.485784530639648, 'model.layers.34.self_attn.v_proj.weight': 23.931339263916016, 'model.layers.34.self_attn.o_proj.weight': 9.381475448608398, 'model.layers.34.mlp.gate_proj.weight': 29.76224136352539, 'model.layers.34.mlp.up_proj.weight': 49.57487106323242, 'model.layers.34.mlp.down_proj.weight': 8.597291946411133, 'model.layers.34.input_layernorm.weight': 2.846062660217285, 'model.layers.34.post_attention_layernorm.weight': 3.556614398956299, 'model.layers.35.self_attn.q_proj.weight': 22.094173431396484, 'model.layers.35.self_attn.k_proj.weight': 1.0674114227294922, 'model.layers.35.self_attn.v_proj.weight': 11.38267993927002, 'model.layers.35.self_attn.o_proj.weight': 8.934866905212402, 'model.layers.35.mlp.gate_proj.weight': 24.453598022460938, 'model.layers.35.mlp.up_proj.weight': 6.79459285736084, 'model.layers.35.mlp.down_proj.weight': 8.752952575683594, 'model.layers.35.input_layernorm.weight': 3.9071273803710938, 'model.layers.35.post_attention_layernorm.weight': 5.32280969619751, 'model.layers.36.self_attn.q_proj.weight': 1.438662052154541, 'model.layers.36.self_attn.k_proj.weight': 0.869401216506958, 'model.layers.36.self_attn.v_proj.weight': 0.7303394079208374, 'model.layers.36.self_attn.o_proj.weight': 9.448954582214355, 'model.layers.36.mlp.gate_proj.weight': 13.707676887512207, 'model.layers.36.mlp.up_proj.weight': 44.59624481201172, 'model.layers.36.mlp.down_proj.weight': 29.1208438873291, 'model.layers.36.input_layernorm.weight': 1.299586296081543, 'model.layers.36.post_attention_layernorm.weight': 2.9682884216308594, 'model.layers.37.self_attn.q_proj.weight': 2.822531223297119, 'model.layers.37.self_attn.k_proj.weight': 8.557249069213867, 'model.layers.37.self_attn.v_proj.weight': 1.038621425628662, 'model.layers.37.self_attn.o_proj.weight': 22.931411743164062, 'model.layers.37.mlp.gate_proj.weight': 2.922790050506592, 'model.layers.37.mlp.up_proj.weight': 24.186344146728516, 'model.layers.37.mlp.down_proj.weight': 18.32794761657715, 'model.layers.37.input_layernorm.weight': 3.51692271232605, 'model.layers.37.post_attention_layernorm.weight': 101.67803955078125, 'model.layers.38.self_attn.q_proj.weight': 11.596566200256348, 'model.layers.38.self_attn.k_proj.weight': 8.682978630065918, 'model.layers.38.self_attn.v_proj.weight': 6.6158013343811035, 'model.layers.38.self_attn.o_proj.weight': 1.9854681491851807, 'model.layers.38.mlp.gate_proj.weight': 27.965742111206055, 'model.layers.38.mlp.up_proj.weight': 8.491037368774414, 'model.layers.38.mlp.down_proj.weight': 34.86315155029297, 'model.layers.38.input_layernorm.weight': 34.58034896850586, 'model.layers.38.post_attention_layernorm.weight': 0.17875725030899048, 'model.layers.39.self_attn.q_proj.weight': 0.36902618408203125, 'model.layers.39.self_attn.k_proj.weight': 3.031505823135376, 'model.layers.39.self_attn.v_proj.weight': 14.207660675048828, 'model.layers.39.self_attn.o_proj.weight': 5.327197074890137, 'model.layers.39.mlp.gate_proj.weight': 20.736440658569336, 'model.layers.39.mlp.up_proj.weight': 76.23828125, 'model.layers.39.mlp.down_proj.weight': 360.467041015625, 'model.layers.39.input_layernorm.weight': 0.8661864995956421, 'model.layers.39.post_attention_layernorm.weight': 6.5724992752075195, 'model.norm.weight': 0.11281043291091919, 'lm_head.weight': 3075.12451171875}\n",
    "sensitivity = []*40\n",
    "dict = {}\n",
    "clayer = 0\n",
    "total_weight = []\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.layers\"):\n",
    "       layer = int(name.split('.')[2])\n",
    "       if layer<0 and layer>=32:\n",
    "           continue\n",
    "       if clayer < layer:\n",
    "           clayer = layer\n",
    "           sensitivity.append(dict)\n",
    "           dict = {}\n",
    "       subname = \".\".join(name.split('.')[3:])\n",
    "       if subname.endswith(\".weight\"):\n",
    "           dict[subname[:-7]] = trace\n",
    "           total_weight.append(trace)\n",
    "sensitivity.append(dict)\n",
    "#print(sensitivity, len(sensitivity))\n",
    "total_weight = sorted(total_weight)\n",
    "print(sorted(total_weight), len(total_weight))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1a8af65f-fe33-44bc-bd19-84342898eac9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1010.9604296684265, 1442.3661541938782, 676.8871879577637, 1186.9910202026367, 2113.5190238952637, 1068.6591396331787, 1222.0463027954102, 1254.5190863609314, 1266.7879333496094, 1032.6756763458252, 932.8781423568726, 743.8002490997314, 488.7700762748718, 346.7903344631195, 334.73515796661377, 474.5761580467224, 376.0424098968506, 467.9197998046875, 396.27048802375793, 365.2723865509033, 346.0572657585144, 333.85840106010437, 331.09354615211487, 262.139422416687, 189.15912878513336, 316.5293133854866, 250.72403156757355, 264.4295563697815, 135.62281602621078, 148.91780042648315, 765.4397902488708, 775.6701593399048]\n"
     ]
    }
   ],
   "source": [
    "## LLaMA2-7B layer-level\n",
    "hessian_trace = {'model.embed_tokens.weight': 465.8458251953125, 'model.layers.0.self_attn.q_proj.weight': 4.1177802085876465, 'model.layers.0.self_attn.k_proj.weight': 18.64617156982422, 'model.layers.0.self_attn.v_proj.weight': 155.2868194580078, 'model.layers.0.self_attn.o_proj.weight': 289.9363098144531, 'model.layers.0.mlp.gate_proj.weight': 58.77898025512695, 'model.layers.0.mlp.up_proj.weight': 103.98562622070312, 'model.layers.0.mlp.down_proj.weight': 303.13995361328125, 'model.layers.0.input_layernorm.weight': 27.68348503112793, 'model.layers.0.post_attention_layernorm.weight': 49.38530349731445, 'model.layers.1.self_attn.q_proj.weight': 32.866249084472656, 'model.layers.1.self_attn.k_proj.weight': 39.01860809326172, 'model.layers.1.self_attn.v_proj.weight': 500, 'model.layers.1.self_attn.o_proj.weight': 127.69908142089844, 'model.layers.1.mlp.gate_proj.weight': 181.39236450195312, 'model.layers.1.mlp.up_proj.weight': 1.8457183837890625, 'model.layers.1.mlp.down_proj.weight': 540, 'model.layers.1.input_layernorm.weight': 13.500053405761719, 'model.layers.1.post_attention_layernorm.weight': 6.044079303741455, 'model.layers.2.self_attn.q_proj.weight': 67.72550964355469, 'model.layers.2.self_attn.k_proj.weight': 116.80323028564453, 'model.layers.2.self_attn.v_proj.weight': 15.893417358398438, 'model.layers.2.self_attn.o_proj.weight': 37.74858474731445, 'model.layers.2.mlp.gate_proj.weight': 131.64105224609375, 'model.layers.2.mlp.up_proj.weight': 123.28805541992188, 'model.layers.2.mlp.down_proj.weight': 53.77529525756836, 'model.layers.2.input_layernorm.weight': 3.2302589416503906, 'model.layers.2.post_attention_layernorm.weight': 126.78178405761719, 'model.layers.3.self_attn.q_proj.weight': 67.09822845458984, 'model.layers.3.self_attn.k_proj.weight': 69.13533782958984, 'model.layers.3.self_attn.v_proj.weight': 181.9348907470703, 'model.layers.3.self_attn.o_proj.weight': 228.10691833496094, 'model.layers.3.mlp.gate_proj.weight': 49.34397506713867, 'model.layers.3.mlp.up_proj.weight': 278.3873596191406, 'model.layers.3.mlp.down_proj.weight': 229.63778686523438, 'model.layers.3.input_layernorm.weight': 50.72489929199219, 'model.layers.3.post_attention_layernorm.weight': 32.62162399291992, 'model.layers.4.self_attn.q_proj.weight': 104.9714584350586, 'model.layers.4.self_attn.k_proj.weight': 84.40615844726562, 'model.layers.4.self_attn.v_proj.weight': 520, 'model.layers.4.self_attn.o_proj.weight': 268.5078430175781, 'model.layers.4.mlp.gate_proj.weight': 245.79966735839844, 'model.layers.4.mlp.up_proj.weight': 348.9795837402344, 'model.layers.4.mlp.down_proj.weight': 424.47705078125, 'model.layers.4.input_layernorm.weight': 101.28925323486328, 'model.layers.4.post_attention_layernorm.weight': 15.088008880615234, 'model.layers.5.self_attn.q_proj.weight': 103.09130096435547, 'model.layers.5.self_attn.k_proj.weight': 81.28451538085938, 'model.layers.5.self_attn.v_proj.weight': 28.6591796875, 'model.layers.5.self_attn.o_proj.weight': 155.21942138671875, 'model.layers.5.mlp.gate_proj.weight': 140.7686767578125, 'model.layers.5.mlp.up_proj.weight': 166.62733459472656, 'model.layers.5.mlp.down_proj.weight': 347.4410095214844, 'model.layers.5.input_layernorm.weight': 44.20230484008789, 'model.layers.5.post_attention_layernorm.weight': 1.365396499633789, 'model.layers.6.self_attn.q_proj.weight': 22.674442291259766, 'model.layers.6.self_attn.k_proj.weight': 117.87711334228516, 'model.layers.6.self_attn.v_proj.weight': 341.9588317871094, 'model.layers.6.self_attn.o_proj.weight': 271.67608642578125, 'model.layers.6.mlp.gate_proj.weight': 109.19419860839844, 'model.layers.6.mlp.up_proj.weight': 261.23052978515625, 'model.layers.6.mlp.down_proj.weight': 58.874732971191406, 'model.layers.6.input_layernorm.weight': 36.10131072998047, 'model.layers.6.post_attention_layernorm.weight': 2.459056854248047, 'model.layers.7.self_attn.q_proj.weight': 47.576271057128906, 'model.layers.7.self_attn.k_proj.weight': 165.8643341064453, 'model.layers.7.self_attn.v_proj.weight': 8.740921020507812, 'model.layers.7.self_attn.o_proj.weight': 192.35586547851562, 'model.layers.7.mlp.gate_proj.weight': 153.43853759765625, 'model.layers.7.mlp.up_proj.weight': 254.81936645507812, 'model.layers.7.mlp.down_proj.weight': 427.9616394042969, 'model.layers.7.input_layernorm.weight': 2.663360118865967, 'model.layers.7.post_attention_layernorm.weight': 1.0987911224365234, 'model.layers.8.self_attn.q_proj.weight': 37.674251556396484, 'model.layers.8.self_attn.k_proj.weight': 107.02897644042969, 'model.layers.8.self_attn.v_proj.weight': 194.10565185546875, 'model.layers.8.self_attn.o_proj.weight': 414.0716857910156, 'model.layers.8.mlp.gate_proj.weight': 121.18950653076172, 'model.layers.8.mlp.up_proj.weight': 323.1715393066406, 'model.layers.8.mlp.down_proj.weight': 23.10235023498535, 'model.layers.8.input_layernorm.weight': 25.022708892822266, 'model.layers.8.post_attention_layernorm.weight': 21.421262741088867, 'model.layers.9.self_attn.q_proj.weight': 26.27182388305664, 'model.layers.9.self_attn.k_proj.weight': 91.70826721191406, 'model.layers.9.self_attn.v_proj.weight': 192.45782470703125, 'model.layers.9.self_attn.o_proj.weight': 206.39627075195312, 'model.layers.9.mlp.gate_proj.weight': 53.91792297363281, 'model.layers.9.mlp.up_proj.weight': 260.1705322265625, 'model.layers.9.mlp.down_proj.weight': 178.96304321289062, 'model.layers.9.input_layernorm.weight': 16.319766998291016, 'model.layers.9.post_attention_layernorm.weight': 6.470224380493164, 'model.layers.10.self_attn.q_proj.weight': 15.849406242370605, 'model.layers.10.self_attn.k_proj.weight': 111.20803833007812, 'model.layers.10.self_attn.v_proj.weight': 440, 'model.layers.10.self_attn.o_proj.weight': 34.95982360839844, 'model.layers.10.mlp.gate_proj.weight': 62.91210174560547, 'model.layers.10.mlp.up_proj.weight': 252.79269409179688, 'model.layers.10.mlp.down_proj.weight': 0.7781143188476562, 'model.layers.10.input_layernorm.weight': 4.09343147277832, 'model.layers.10.post_attention_layernorm.weight': 10.28453254699707, 'model.layers.11.self_attn.q_proj.weight': 104.68980407714844, 'model.layers.11.self_attn.k_proj.weight': 80.12242126464844, 'model.layers.11.self_attn.v_proj.weight': 254.91937255859375, 'model.layers.11.self_attn.o_proj.weight': 149.952880859375, 'model.layers.11.mlp.gate_proj.weight': 16.357452392578125, 'model.layers.11.mlp.up_proj.weight': 111.54994201660156, 'model.layers.11.mlp.down_proj.weight': 2.8896121978759766, 'model.layers.11.input_layernorm.weight': 9.309587478637695, 'model.layers.11.post_attention_layernorm.weight': 14.009176254272461, 'model.layers.12.self_attn.q_proj.weight': 10.744429588317871, 'model.layers.12.self_attn.k_proj.weight': 87.85879516601562, 'model.layers.12.self_attn.v_proj.weight': 222.95887756347656, 'model.layers.12.self_attn.o_proj.weight': 54.11162567138672, 'model.layers.12.mlp.gate_proj.weight': 48.817787170410156, 'model.layers.12.mlp.up_proj.weight': 6.750141143798828, 'model.layers.12.mlp.down_proj.weight': 48.50048065185547, 'model.layers.12.input_layernorm.weight': 4.535086154937744, 'model.layers.12.post_attention_layernorm.weight': 4.492853164672852, 'model.layers.13.self_attn.q_proj.weight': 30.856658935546875, 'model.layers.13.self_attn.k_proj.weight': 56.81590270996094, 'model.layers.13.self_attn.v_proj.weight': 36.241607666015625, 'model.layers.13.self_attn.o_proj.weight': 112.63178253173828, 'model.layers.13.mlp.gate_proj.weight': 2.962164878845215, 'model.layers.13.mlp.up_proj.weight': 54.40440368652344, 'model.layers.13.mlp.down_proj.weight': 41.355377197265625, 'model.layers.13.input_layernorm.weight': 9.033424377441406, 'model.layers.13.post_attention_layernorm.weight': 2.4890124797821045, 'model.layers.14.self_attn.q_proj.weight': 22.63298988342285, 'model.layers.14.self_attn.k_proj.weight': 30.87592315673828, 'model.layers.14.self_attn.v_proj.weight': 99.5544662475586, 'model.layers.14.self_attn.o_proj.weight': 103.61449432373047, 'model.layers.14.mlp.gate_proj.weight': 9.113536834716797, 'model.layers.14.mlp.up_proj.weight': 40.54330062866211, 'model.layers.14.mlp.down_proj.weight': 10.268321990966797, 'model.layers.14.input_layernorm.weight': 8.420675277709961, 'model.layers.14.post_attention_layernorm.weight': 9.71144962310791, 'model.layers.15.self_attn.q_proj.weight': 14.43136978149414, 'model.layers.15.self_attn.k_proj.weight': 52.20471954345703, 'model.layers.15.self_attn.v_proj.weight': 93.446044921875, 'model.layers.15.self_attn.o_proj.weight': 147.3588104248047, 'model.layers.15.mlp.gate_proj.weight': 3.7850570678710938, 'model.layers.15.mlp.up_proj.weight': 59.45077896118164, 'model.layers.15.mlp.down_proj.weight': 93.62474060058594, 'model.layers.15.input_layernorm.weight': 5.225710391998291, 'model.layers.15.post_attention_layernorm.weight': 5.04892635345459, 'model.layers.16.self_attn.q_proj.weight': 13.323066711425781, 'model.layers.16.self_attn.k_proj.weight': 17.881431579589844, 'model.layers.16.self_attn.v_proj.weight': 110.5497055053711, 'model.layers.16.self_attn.o_proj.weight': 72.61431884765625, 'model.layers.16.mlp.gate_proj.weight': 24.68994140625, 'model.layers.16.mlp.up_proj.weight': 54.579917907714844, 'model.layers.16.mlp.down_proj.weight': 71.73147583007812, 'model.layers.16.input_layernorm.weight': 4.927809715270996, 'model.layers.16.post_attention_layernorm.weight': 5.744742393493652, 'model.layers.17.self_attn.q_proj.weight': 54.01586151123047, 'model.layers.17.self_attn.k_proj.weight': 36.995384216308594, 'model.layers.17.self_attn.v_proj.weight': 120.51625061035156, 'model.layers.17.self_attn.o_proj.weight': 7.629405975341797, 'model.layers.17.mlp.gate_proj.weight': 57.606201171875, 'model.layers.17.mlp.up_proj.weight': 90.13648986816406, 'model.layers.17.mlp.down_proj.weight': 89.73792266845703, 'model.layers.17.input_layernorm.weight': 9.565960884094238, 'model.layers.17.post_attention_layernorm.weight': 1.716322898864746, 'model.layers.18.self_attn.q_proj.weight': 37.11637878417969, 'model.layers.18.self_attn.k_proj.weight': 17.056533813476562, 'model.layers.18.self_attn.v_proj.weight': 33.5388298034668, 'model.layers.18.self_attn.o_proj.weight': 54.345890045166016, 'model.layers.18.mlp.gate_proj.weight': 63.99528884887695, 'model.layers.18.mlp.up_proj.weight': 43.36025619506836, 'model.layers.18.mlp.down_proj.weight': 146.2655487060547, 'model.layers.18.input_layernorm.weight': 0.24897515773773193, 'model.layers.18.post_attention_layernorm.weight': 0.34278666973114014, 'model.layers.19.self_attn.q_proj.weight': 2.163707733154297, 'model.layers.19.self_attn.k_proj.weight': 1.1855230331420898, 'model.layers.19.self_attn.v_proj.weight': 39.53768539428711, 'model.layers.19.self_attn.o_proj.weight': 37.54239273071289, 'model.layers.19.mlp.gate_proj.weight': 41.20253372192383, 'model.layers.19.mlp.up_proj.weight': 164.97491455078125, 'model.layers.19.mlp.down_proj.weight': 67.04237365722656, 'model.layers.19.input_layernorm.weight': 6.374223709106445, 'model.layers.19.post_attention_layernorm.weight': 5.249032020568848, 'model.layers.20.self_attn.q_proj.weight': 38.78921890258789, 'model.layers.20.self_attn.k_proj.weight': 8.54646110534668, 'model.layers.20.self_attn.v_proj.weight': 97.12510681152344, 'model.layers.20.self_attn.o_proj.weight': 23.358356475830078, 'model.layers.20.mlp.gate_proj.weight': 83.11161041259766, 'model.layers.20.mlp.up_proj.weight': 36.83805847167969, 'model.layers.20.mlp.down_proj.weight': 46.52040100097656, 'model.layers.20.input_layernorm.weight': 8.389382362365723, 'model.layers.20.post_attention_layernorm.weight': 3.3786702156066895, 'model.layers.21.self_attn.q_proj.weight': 25.657180786132812, 'model.layers.21.self_attn.k_proj.weight': 22.558643341064453, 'model.layers.21.self_attn.v_proj.weight': 95.27780151367188, 'model.layers.21.self_attn.o_proj.weight': 2.5621509552001953, 'model.layers.21.mlp.gate_proj.weight': 33.0644416809082, 'model.layers.21.mlp.up_proj.weight': 71.85498809814453, 'model.layers.21.mlp.down_proj.weight': 75.43953704833984, 'model.layers.21.input_layernorm.weight': 2.32242751121521, 'model.layers.21.post_attention_layernorm.weight': 5.121230125427246, 'model.layers.22.self_attn.q_proj.weight': 28.28116226196289, 'model.layers.22.self_attn.k_proj.weight': 45.24074935913086, 'model.layers.22.self_attn.v_proj.weight': 32.30644989013672, 'model.layers.22.self_attn.o_proj.weight': 27.296077728271484, 'model.layers.22.mlp.gate_proj.weight': 81.89623260498047, 'model.layers.22.mlp.up_proj.weight': 49.941566467285156, 'model.layers.22.mlp.down_proj.weight': 59.70683670043945, 'model.layers.22.input_layernorm.weight': 3.4902665615081787, 'model.layers.22.post_attention_layernorm.weight': 2.934204578399658, 'model.layers.23.self_attn.q_proj.weight': 2.006079912185669, 'model.layers.23.self_attn.k_proj.weight': 14.836212158203125, 'model.layers.23.self_attn.v_proj.weight': 29.880844116210938, 'model.layers.23.self_attn.o_proj.weight': 61.289520263671875, 'model.layers.23.mlp.gate_proj.weight': 84.7732925415039, 'model.layers.23.mlp.up_proj.weight': 12.803875923156738, 'model.layers.23.mlp.down_proj.weight': 48.94889831542969, 'model.layers.23.input_layernorm.weight': 5.6861114501953125, 'model.layers.23.post_attention_layernorm.weight': 1.9145877361297607, 'model.layers.24.self_attn.q_proj.weight': 19.931251525878906, 'model.layers.24.self_attn.k_proj.weight': 18.515501022338867, 'model.layers.24.self_attn.v_proj.weight': 0.6416511535644531, 'model.layers.24.self_attn.o_proj.weight': 13.825777053833008, 'model.layers.24.mlp.gate_proj.weight': 46.91920471191406, 'model.layers.24.mlp.up_proj.weight': 49.85988235473633, 'model.layers.24.mlp.down_proj.weight': 34.795684814453125, 'model.layers.24.input_layernorm.weight': 3.822614908218384, 'model.layers.24.post_attention_layernorm.weight': 0.847561240196228, 'model.layers.25.self_attn.q_proj.weight': 18.419729232788086, 'model.layers.25.self_attn.k_proj.weight': 12.948476791381836, 'model.layers.25.self_attn.v_proj.weight': 90.69570922851562, 'model.layers.25.self_attn.o_proj.weight': 4.8211283683776855, 'model.layers.25.mlp.gate_proj.weight': 86.09648895263672, 'model.layers.25.mlp.up_proj.weight': 14.497987747192383, 'model.layers.25.mlp.down_proj.weight': 85.0718994140625, 'model.layers.25.input_layernorm.weight': 3.1521644592285156, 'model.layers.25.post_attention_layernorm.weight': 0.8257291913032532, 'model.layers.26.self_attn.q_proj.weight': 15.60930061340332, 'model.layers.26.self_attn.k_proj.weight': 60.62446594238281, 'model.layers.26.self_attn.v_proj.weight': 3.335906982421875, 'model.layers.26.self_attn.o_proj.weight': 70.5920181274414, 'model.layers.26.mlp.gate_proj.weight': 6.244541168212891, 'model.layers.26.mlp.up_proj.weight': 23.825096130371094, 'model.layers.26.mlp.down_proj.weight': 66.04875946044922, 'model.layers.26.input_layernorm.weight': 1.7099910974502563, 'model.layers.26.post_attention_layernorm.weight': 2.733952045440674, 'model.layers.27.self_attn.q_proj.weight': 27.89179801940918, 'model.layers.27.self_attn.k_proj.weight': 7.037299633026123, 'model.layers.27.self_attn.v_proj.weight': 53.303192138671875, 'model.layers.27.self_attn.o_proj.weight': 31.342844009399414, 'model.layers.27.mlp.gate_proj.weight': 4.575723171234131, 'model.layers.27.mlp.up_proj.weight': 32.228424072265625, 'model.layers.27.mlp.down_proj.weight': 98.73921203613281, 'model.layers.27.input_layernorm.weight': 4.521480560302734, 'model.layers.27.post_attention_layernorm.weight': 4.7895827293396, 'model.layers.28.self_attn.q_proj.weight': 5.490924835205078, 'model.layers.28.self_attn.k_proj.weight': 6.446569919586182, 'model.layers.28.self_attn.v_proj.weight': 36.667449951171875, 'model.layers.28.self_attn.o_proj.weight': 9.744807243347168, 'model.layers.28.mlp.gate_proj.weight': 52.686004638671875, 'model.layers.28.mlp.up_proj.weight': 18.47812271118164, 'model.layers.28.mlp.down_proj.weight': 0.10639667510986328, 'model.layers.28.input_layernorm.weight': 5.1784515380859375, 'model.layers.28.post_attention_layernorm.weight': 0.8240885138511658, 'model.layers.29.self_attn.q_proj.weight': 10.2254638671875, 'model.layers.29.self_attn.k_proj.weight': 14.042658805847168, 'model.layers.29.self_attn.v_proj.weight': 29.998687744140625, 'model.layers.29.self_attn.o_proj.weight': 6.178681373596191, 'model.layers.29.mlp.gate_proj.weight': 14.618688583374023, 'model.layers.29.mlp.up_proj.weight': 24.72657012939453, 'model.layers.29.mlp.down_proj.weight': 25.498863220214844, 'model.layers.29.input_layernorm.weight': 6.801156520843506, 'model.layers.29.post_attention_layernorm.weight': 16.827030181884766, 'model.layers.30.self_attn.q_proj.weight': 0.8939738273620605, 'model.layers.30.self_attn.k_proj.weight': 24.32806396484375, 'model.layers.30.self_attn.v_proj.weight': 4.347783088684082, 'model.layers.30.self_attn.o_proj.weight': 31.399599075317383, 'model.layers.30.mlp.gate_proj.weight': 44.27931594848633, 'model.layers.30.mlp.up_proj.weight': 153.48779296875, 'model.layers.30.mlp.down_proj.weight': 480, 'model.layers.30.input_layernorm.weight': 12.225543022155762, 'model.layers.30.post_attention_layernorm.weight': 14.477718353271484, 'model.layers.31.self_attn.q_proj.weight': 6.388827800750732, 'model.layers.31.self_attn.k_proj.weight': 8.219558715820312, 'model.layers.31.self_attn.v_proj.weight': 14.031820297241211, 'model.layers.31.self_attn.o_proj.weight': 28.330585479736328, 'model.layers.31.mlp.gate_proj.weight': 30.65607452392578, 'model.layers.31.mlp.up_proj.weight': 215.90744018554688, 'model.layers.31.mlp.down_proj.weight': 460, 'model.layers.31.input_layernorm.weight': 3.5382285118103027, 'model.layers.31.post_attention_layernorm.weight': 8.597623825073242, 'model.norm.weight': 7.943634986877441, 'lm_head.weight': 5670.7392578125}\n",
    "sensitivity = [0]*32\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.layers\"):\n",
    "       layer = int(name.split('.')[2])\n",
    "       sensitivity[layer] += trace\n",
    "print(sensitivity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "e49d2f02-03db-494c-8ce6-ef5b08524f09",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.10639667510986328, 0.24897515773773193, 0.34278666973114014, 0.6416511535644531, 0.7781143188476562, 0.8240885138511658, 0.8257291913032532, 0.847561240196228, 0.8939738273620605, 1.0987911224365234, 1.1855230331420898, 1.365396499633789, 1.7099910974502563, 1.716322898864746, 1.8457183837890625, 1.9145877361297607, 2.006079912185669, 2.163707733154297, 2.32242751121521, 2.459056854248047, 2.4890124797821045, 2.5621509552001953, 2.663360118865967, 2.733952045440674, 2.8896121978759766, 2.934204578399658, 2.962164878845215, 3.1521644592285156, 3.2302589416503906, 3.335906982421875, 3.3786702156066895, 3.4902665615081787, 3.5382285118103027, 3.7850570678710938, 3.822614908218384, 4.09343147277832, 4.1177802085876465, 4.347783088684082, 4.492853164672852, 4.521480560302734, 4.535086154937744, 4.575723171234131, 4.7895827293396, 4.8211283683776855, 4.927809715270996, 5.04892635345459, 5.121230125427246, 5.1784515380859375, 5.225710391998291, 5.249032020568848, 5.490924835205078, 5.6861114501953125, 5.744742393493652, 6.044079303741455, 6.178681373596191, 6.244541168212891, 6.374223709106445, 6.388827800750732, 6.446569919586182, 6.470224380493164, 6.750141143798828, 6.801156520843506, 7.037299633026123, 7.629405975341797, 8.219558715820312, 8.389382362365723, 8.420675277709961, 8.54646110534668, 8.597623825073242, 8.740921020507812, 9.033424377441406, 9.113536834716797, 9.309587478637695, 9.565960884094238, 9.71144962310791, 9.744807243347168, 10.2254638671875, 10.268321990966797, 10.28453254699707, 10.744429588317871, 12.225543022155762, 12.803875923156738, 12.948476791381836, 13.323066711425781, 13.500053405761719, 13.825777053833008, 14.009176254272461, 14.031820297241211, 14.042658805847168, 14.43136978149414, 14.477718353271484, 14.497987747192383, 14.618688583374023, 14.836212158203125, 15.088008880615234, 15.60930061340332, 15.849406242370605, 15.893417358398438, 16.319766998291016, 16.357452392578125, 16.827030181884766, 17.056533813476562, 17.881431579589844, 18.419729232788086, 18.47812271118164, 18.515501022338867, 18.64617156982422, 19.931251525878906, 21.421262741088867, 22.558643341064453, 22.63298988342285, 22.674442291259766, 23.10235023498535, 23.358356475830078, 23.825096130371094, 24.32806396484375, 24.68994140625, 24.72657012939453, 25.022708892822266, 25.498863220214844, 25.657180786132812, 26.27182388305664, 27.296077728271484, 27.68348503112793, 27.89179801940918, 28.28116226196289, 28.330585479736328, 28.6591796875, 29.880844116210938, 29.998687744140625, 30.65607452392578, 30.856658935546875, 30.87592315673828, 31.342844009399414, 31.399599075317383, 32.228424072265625, 32.30644989013672, 32.62162399291992, 32.866249084472656, 33.0644416809082, 33.5388298034668, 34.795684814453125, 34.95982360839844, 36.10131072998047, 36.241607666015625, 36.667449951171875, 36.83805847167969, 36.995384216308594, 37.11637878417969, 37.54239273071289, 37.674251556396484, 37.74858474731445, 38.78921890258789, 39.01860809326172, 39.53768539428711, 40.54330062866211, 41.20253372192383, 41.355377197265625, 43.36025619506836, 44.20230484008789, 44.27931594848633, 45.24074935913086, 46.52040100097656, 46.91920471191406, 47.576271057128906, 48.50048065185547, 48.817787170410156, 48.94889831542969, 49.34397506713867, 49.38530349731445, 49.85988235473633, 49.941566467285156, 50.72489929199219, 52.20471954345703, 52.686004638671875, 53.303192138671875, 53.77529525756836, 53.91792297363281, 54.01586151123047, 54.11162567138672, 54.345890045166016, 54.40440368652344, 54.579917907714844, 56.81590270996094, 57.606201171875, 58.77898025512695, 58.874732971191406, 59.45077896118164, 59.70683670043945, 60.62446594238281, 61.289520263671875, 62.91210174560547, 63.99528884887695, 66.04875946044922, 67.04237365722656, 67.09822845458984, 67.72550964355469, 69.13533782958984, 70.5920181274414, 71.73147583007812, 71.85498809814453, 72.61431884765625, 75.43953704833984, 80.12242126464844, 81.28451538085938, 81.89623260498047, 83.11161041259766, 84.40615844726562, 84.7732925415039, 85.0718994140625, 86.09648895263672, 87.85879516601562, 89.73792266845703, 90.13648986816406, 90.69570922851562, 91.70826721191406, 93.446044921875, 93.62474060058594, 95.27780151367188, 97.12510681152344, 98.73921203613281, 99.5544662475586, 101.28925323486328, 103.09130096435547, 103.61449432373047, 103.98562622070312, 104.68980407714844, 104.9714584350586, 107.02897644042969, 109.19419860839844, 110.5497055053711, 111.20803833007812, 111.54994201660156, 112.63178253173828, 116.80323028564453, 117.87711334228516, 120.51625061035156, 121.18950653076172, 123.28805541992188, 126.78178405761719, 127.69908142089844, 131.64105224609375, 140.7686767578125, 146.2655487060547, 147.3588104248047, 149.952880859375, 153.43853759765625, 153.48779296875, 155.21942138671875, 155.2868194580078, 164.97491455078125, 165.8643341064453, 166.62733459472656, 178.96304321289062, 181.39236450195312, 181.9348907470703, 192.35586547851562, 192.45782470703125, 194.10565185546875, 206.39627075195312, 215.90744018554688, 222.95887756347656, 228.10691833496094, 229.63778686523438, 245.79966735839844, 252.79269409179688, 254.81936645507812, 254.91937255859375, 260.1705322265625, 261.23052978515625, 268.5078430175781, 271.67608642578125, 278.3873596191406, 289.9363098144531, 303.13995361328125, 323.1715393066406, 341.9588317871094, 347.4410095214844, 348.9795837402344, 414.0716857910156, 424.47705078125, 427.9616394042969, 440, 460, 480, 500, 520, 540] 288\n"
     ]
    }
   ],
   "source": [
    "## LLaMA2-7B weight-level\n",
    "hessian_trace = {'model.embed_tokens.weight': 465.8458251953125, 'model.layers.0.self_attn.q_proj.weight': 4.1177802085876465, 'model.layers.0.self_attn.k_proj.weight': 18.64617156982422, 'model.layers.0.self_attn.v_proj.weight': 155.2868194580078, 'model.layers.0.self_attn.o_proj.weight': 289.9363098144531, 'model.layers.0.mlp.gate_proj.weight': 58.77898025512695, 'model.layers.0.mlp.up_proj.weight': 103.98562622070312, 'model.layers.0.mlp.down_proj.weight': 303.13995361328125, 'model.layers.0.input_layernorm.weight': 27.68348503112793, 'model.layers.0.post_attention_layernorm.weight': 49.38530349731445, 'model.layers.1.self_attn.q_proj.weight': 32.866249084472656, 'model.layers.1.self_attn.k_proj.weight': 39.01860809326172, 'model.layers.1.self_attn.v_proj.weight': 500, 'model.layers.1.self_attn.o_proj.weight': 127.69908142089844, 'model.layers.1.mlp.gate_proj.weight': 181.39236450195312, 'model.layers.1.mlp.up_proj.weight': 1.8457183837890625, 'model.layers.1.mlp.down_proj.weight': 540, 'model.layers.1.input_layernorm.weight': 13.500053405761719, 'model.layers.1.post_attention_layernorm.weight': 6.044079303741455, 'model.layers.2.self_attn.q_proj.weight': 67.72550964355469, 'model.layers.2.self_attn.k_proj.weight': 116.80323028564453, 'model.layers.2.self_attn.v_proj.weight': 15.893417358398438, 'model.layers.2.self_attn.o_proj.weight': 37.74858474731445, 'model.layers.2.mlp.gate_proj.weight': 131.64105224609375, 'model.layers.2.mlp.up_proj.weight': 123.28805541992188, 'model.layers.2.mlp.down_proj.weight': 53.77529525756836, 'model.layers.2.input_layernorm.weight': 3.2302589416503906, 'model.layers.2.post_attention_layernorm.weight': 126.78178405761719, 'model.layers.3.self_attn.q_proj.weight': 67.09822845458984, 'model.layers.3.self_attn.k_proj.weight': 69.13533782958984, 'model.layers.3.self_attn.v_proj.weight': 181.9348907470703, 'model.layers.3.self_attn.o_proj.weight': 228.10691833496094, 'model.layers.3.mlp.gate_proj.weight': 49.34397506713867, 'model.layers.3.mlp.up_proj.weight': 278.3873596191406, 'model.layers.3.mlp.down_proj.weight': 229.63778686523438, 'model.layers.3.input_layernorm.weight': 50.72489929199219, 'model.layers.3.post_attention_layernorm.weight': 32.62162399291992, 'model.layers.4.self_attn.q_proj.weight': 104.9714584350586, 'model.layers.4.self_attn.k_proj.weight': 84.40615844726562, 'model.layers.4.self_attn.v_proj.weight': 520, 'model.layers.4.self_attn.o_proj.weight': 268.5078430175781, 'model.layers.4.mlp.gate_proj.weight': 245.79966735839844, 'model.layers.4.mlp.up_proj.weight': 348.9795837402344, 'model.layers.4.mlp.down_proj.weight': 424.47705078125, 'model.layers.4.input_layernorm.weight': 101.28925323486328, 'model.layers.4.post_attention_layernorm.weight': 15.088008880615234, 'model.layers.5.self_attn.q_proj.weight': 103.09130096435547, 'model.layers.5.self_attn.k_proj.weight': 81.28451538085938, 'model.layers.5.self_attn.v_proj.weight': 28.6591796875, 'model.layers.5.self_attn.o_proj.weight': 155.21942138671875, 'model.layers.5.mlp.gate_proj.weight': 140.7686767578125, 'model.layers.5.mlp.up_proj.weight': 166.62733459472656, 'model.layers.5.mlp.down_proj.weight': 347.4410095214844, 'model.layers.5.input_layernorm.weight': 44.20230484008789, 'model.layers.5.post_attention_layernorm.weight': 1.365396499633789, 'model.layers.6.self_attn.q_proj.weight': 22.674442291259766, 'model.layers.6.self_attn.k_proj.weight': 117.87711334228516, 'model.layers.6.self_attn.v_proj.weight': 341.9588317871094, 'model.layers.6.self_attn.o_proj.weight': 271.67608642578125, 'model.layers.6.mlp.gate_proj.weight': 109.19419860839844, 'model.layers.6.mlp.up_proj.weight': 261.23052978515625, 'model.layers.6.mlp.down_proj.weight': 58.874732971191406, 'model.layers.6.input_layernorm.weight': 36.10131072998047, 'model.layers.6.post_attention_layernorm.weight': 2.459056854248047, 'model.layers.7.self_attn.q_proj.weight': 47.576271057128906, 'model.layers.7.self_attn.k_proj.weight': 165.8643341064453, 'model.layers.7.self_attn.v_proj.weight': 8.740921020507812, 'model.layers.7.self_attn.o_proj.weight': 192.35586547851562, 'model.layers.7.mlp.gate_proj.weight': 153.43853759765625, 'model.layers.7.mlp.up_proj.weight': 254.81936645507812, 'model.layers.7.mlp.down_proj.weight': 427.9616394042969, 'model.layers.7.input_layernorm.weight': 2.663360118865967, 'model.layers.7.post_attention_layernorm.weight': 1.0987911224365234, 'model.layers.8.self_attn.q_proj.weight': 37.674251556396484, 'model.layers.8.self_attn.k_proj.weight': 107.02897644042969, 'model.layers.8.self_attn.v_proj.weight': 194.10565185546875, 'model.layers.8.self_attn.o_proj.weight': 414.0716857910156, 'model.layers.8.mlp.gate_proj.weight': 121.18950653076172, 'model.layers.8.mlp.up_proj.weight': 323.1715393066406, 'model.layers.8.mlp.down_proj.weight': 23.10235023498535, 'model.layers.8.input_layernorm.weight': 25.022708892822266, 'model.layers.8.post_attention_layernorm.weight': 21.421262741088867, 'model.layers.9.self_attn.q_proj.weight': 26.27182388305664, 'model.layers.9.self_attn.k_proj.weight': 91.70826721191406, 'model.layers.9.self_attn.v_proj.weight': 192.45782470703125, 'model.layers.9.self_attn.o_proj.weight': 206.39627075195312, 'model.layers.9.mlp.gate_proj.weight': 53.91792297363281, 'model.layers.9.mlp.up_proj.weight': 260.1705322265625, 'model.layers.9.mlp.down_proj.weight': 178.96304321289062, 'model.layers.9.input_layernorm.weight': 16.319766998291016, 'model.layers.9.post_attention_layernorm.weight': 6.470224380493164, 'model.layers.10.self_attn.q_proj.weight': 15.849406242370605, 'model.layers.10.self_attn.k_proj.weight': 111.20803833007812, 'model.layers.10.self_attn.v_proj.weight': 440, 'model.layers.10.self_attn.o_proj.weight': 34.95982360839844, 'model.layers.10.mlp.gate_proj.weight': 62.91210174560547, 'model.layers.10.mlp.up_proj.weight': 252.79269409179688, 'model.layers.10.mlp.down_proj.weight': 0.7781143188476562, 'model.layers.10.input_layernorm.weight': 4.09343147277832, 'model.layers.10.post_attention_layernorm.weight': 10.28453254699707, 'model.layers.11.self_attn.q_proj.weight': 104.68980407714844, 'model.layers.11.self_attn.k_proj.weight': 80.12242126464844, 'model.layers.11.self_attn.v_proj.weight': 254.91937255859375, 'model.layers.11.self_attn.o_proj.weight': 149.952880859375, 'model.layers.11.mlp.gate_proj.weight': 16.357452392578125, 'model.layers.11.mlp.up_proj.weight': 111.54994201660156, 'model.layers.11.mlp.down_proj.weight': 2.8896121978759766, 'model.layers.11.input_layernorm.weight': 9.309587478637695, 'model.layers.11.post_attention_layernorm.weight': 14.009176254272461, 'model.layers.12.self_attn.q_proj.weight': 10.744429588317871, 'model.layers.12.self_attn.k_proj.weight': 87.85879516601562, 'model.layers.12.self_attn.v_proj.weight': 222.95887756347656, 'model.layers.12.self_attn.o_proj.weight': 54.11162567138672, 'model.layers.12.mlp.gate_proj.weight': 48.817787170410156, 'model.layers.12.mlp.up_proj.weight': 6.750141143798828, 'model.layers.12.mlp.down_proj.weight': 48.50048065185547, 'model.layers.12.input_layernorm.weight': 4.535086154937744, 'model.layers.12.post_attention_layernorm.weight': 4.492853164672852, 'model.layers.13.self_attn.q_proj.weight': 30.856658935546875, 'model.layers.13.self_attn.k_proj.weight': 56.81590270996094, 'model.layers.13.self_attn.v_proj.weight': 36.241607666015625, 'model.layers.13.self_attn.o_proj.weight': 112.63178253173828, 'model.layers.13.mlp.gate_proj.weight': 2.962164878845215, 'model.layers.13.mlp.up_proj.weight': 54.40440368652344, 'model.layers.13.mlp.down_proj.weight': 41.355377197265625, 'model.layers.13.input_layernorm.weight': 9.033424377441406, 'model.layers.13.post_attention_layernorm.weight': 2.4890124797821045, 'model.layers.14.self_attn.q_proj.weight': 22.63298988342285, 'model.layers.14.self_attn.k_proj.weight': 30.87592315673828, 'model.layers.14.self_attn.v_proj.weight': 99.5544662475586, 'model.layers.14.self_attn.o_proj.weight': 103.61449432373047, 'model.layers.14.mlp.gate_proj.weight': 9.113536834716797, 'model.layers.14.mlp.up_proj.weight': 40.54330062866211, 'model.layers.14.mlp.down_proj.weight': 10.268321990966797, 'model.layers.14.input_layernorm.weight': 8.420675277709961, 'model.layers.14.post_attention_layernorm.weight': 9.71144962310791, 'model.layers.15.self_attn.q_proj.weight': 14.43136978149414, 'model.layers.15.self_attn.k_proj.weight': 52.20471954345703, 'model.layers.15.self_attn.v_proj.weight': 93.446044921875, 'model.layers.15.self_attn.o_proj.weight': 147.3588104248047, 'model.layers.15.mlp.gate_proj.weight': 3.7850570678710938, 'model.layers.15.mlp.up_proj.weight': 59.45077896118164, 'model.layers.15.mlp.down_proj.weight': 93.62474060058594, 'model.layers.15.input_layernorm.weight': 5.225710391998291, 'model.layers.15.post_attention_layernorm.weight': 5.04892635345459, 'model.layers.16.self_attn.q_proj.weight': 13.323066711425781, 'model.layers.16.self_attn.k_proj.weight': 17.881431579589844, 'model.layers.16.self_attn.v_proj.weight': 110.5497055053711, 'model.layers.16.self_attn.o_proj.weight': 72.61431884765625, 'model.layers.16.mlp.gate_proj.weight': 24.68994140625, 'model.layers.16.mlp.up_proj.weight': 54.579917907714844, 'model.layers.16.mlp.down_proj.weight': 71.73147583007812, 'model.layers.16.input_layernorm.weight': 4.927809715270996, 'model.layers.16.post_attention_layernorm.weight': 5.744742393493652, 'model.layers.17.self_attn.q_proj.weight': 54.01586151123047, 'model.layers.17.self_attn.k_proj.weight': 36.995384216308594, 'model.layers.17.self_attn.v_proj.weight': 120.51625061035156, 'model.layers.17.self_attn.o_proj.weight': 7.629405975341797, 'model.layers.17.mlp.gate_proj.weight': 57.606201171875, 'model.layers.17.mlp.up_proj.weight': 90.13648986816406, 'model.layers.17.mlp.down_proj.weight': 89.73792266845703, 'model.layers.17.input_layernorm.weight': 9.565960884094238, 'model.layers.17.post_attention_layernorm.weight': 1.716322898864746, 'model.layers.18.self_attn.q_proj.weight': 37.11637878417969, 'model.layers.18.self_attn.k_proj.weight': 17.056533813476562, 'model.layers.18.self_attn.v_proj.weight': 33.5388298034668, 'model.layers.18.self_attn.o_proj.weight': 54.345890045166016, 'model.layers.18.mlp.gate_proj.weight': 63.99528884887695, 'model.layers.18.mlp.up_proj.weight': 43.36025619506836, 'model.layers.18.mlp.down_proj.weight': 146.2655487060547, 'model.layers.18.input_layernorm.weight': 0.24897515773773193, 'model.layers.18.post_attention_layernorm.weight': 0.34278666973114014, 'model.layers.19.self_attn.q_proj.weight': 2.163707733154297, 'model.layers.19.self_attn.k_proj.weight': 1.1855230331420898, 'model.layers.19.self_attn.v_proj.weight': 39.53768539428711, 'model.layers.19.self_attn.o_proj.weight': 37.54239273071289, 'model.layers.19.mlp.gate_proj.weight': 41.20253372192383, 'model.layers.19.mlp.up_proj.weight': 164.97491455078125, 'model.layers.19.mlp.down_proj.weight': 67.04237365722656, 'model.layers.19.input_layernorm.weight': 6.374223709106445, 'model.layers.19.post_attention_layernorm.weight': 5.249032020568848, 'model.layers.20.self_attn.q_proj.weight': 38.78921890258789, 'model.layers.20.self_attn.k_proj.weight': 8.54646110534668, 'model.layers.20.self_attn.v_proj.weight': 97.12510681152344, 'model.layers.20.self_attn.o_proj.weight': 23.358356475830078, 'model.layers.20.mlp.gate_proj.weight': 83.11161041259766, 'model.layers.20.mlp.up_proj.weight': 36.83805847167969, 'model.layers.20.mlp.down_proj.weight': 46.52040100097656, 'model.layers.20.input_layernorm.weight': 8.389382362365723, 'model.layers.20.post_attention_layernorm.weight': 3.3786702156066895, 'model.layers.21.self_attn.q_proj.weight': 25.657180786132812, 'model.layers.21.self_attn.k_proj.weight': 22.558643341064453, 'model.layers.21.self_attn.v_proj.weight': 95.27780151367188, 'model.layers.21.self_attn.o_proj.weight': 2.5621509552001953, 'model.layers.21.mlp.gate_proj.weight': 33.0644416809082, 'model.layers.21.mlp.up_proj.weight': 71.85498809814453, 'model.layers.21.mlp.down_proj.weight': 75.43953704833984, 'model.layers.21.input_layernorm.weight': 2.32242751121521, 'model.layers.21.post_attention_layernorm.weight': 5.121230125427246, 'model.layers.22.self_attn.q_proj.weight': 28.28116226196289, 'model.layers.22.self_attn.k_proj.weight': 45.24074935913086, 'model.layers.22.self_attn.v_proj.weight': 32.30644989013672, 'model.layers.22.self_attn.o_proj.weight': 27.296077728271484, 'model.layers.22.mlp.gate_proj.weight': 81.89623260498047, 'model.layers.22.mlp.up_proj.weight': 49.941566467285156, 'model.layers.22.mlp.down_proj.weight': 59.70683670043945, 'model.layers.22.input_layernorm.weight': 3.4902665615081787, 'model.layers.22.post_attention_layernorm.weight': 2.934204578399658, 'model.layers.23.self_attn.q_proj.weight': 2.006079912185669, 'model.layers.23.self_attn.k_proj.weight': 14.836212158203125, 'model.layers.23.self_attn.v_proj.weight': 29.880844116210938, 'model.layers.23.self_attn.o_proj.weight': 61.289520263671875, 'model.layers.23.mlp.gate_proj.weight': 84.7732925415039, 'model.layers.23.mlp.up_proj.weight': 12.803875923156738, 'model.layers.23.mlp.down_proj.weight': 48.94889831542969, 'model.layers.23.input_layernorm.weight': 5.6861114501953125, 'model.layers.23.post_attention_layernorm.weight': 1.9145877361297607, 'model.layers.24.self_attn.q_proj.weight': 19.931251525878906, 'model.layers.24.self_attn.k_proj.weight': 18.515501022338867, 'model.layers.24.self_attn.v_proj.weight': 0.6416511535644531, 'model.layers.24.self_attn.o_proj.weight': 13.825777053833008, 'model.layers.24.mlp.gate_proj.weight': 46.91920471191406, 'model.layers.24.mlp.up_proj.weight': 49.85988235473633, 'model.layers.24.mlp.down_proj.weight': 34.795684814453125, 'model.layers.24.input_layernorm.weight': 3.822614908218384, 'model.layers.24.post_attention_layernorm.weight': 0.847561240196228, 'model.layers.25.self_attn.q_proj.weight': 18.419729232788086, 'model.layers.25.self_attn.k_proj.weight': 12.948476791381836, 'model.layers.25.self_attn.v_proj.weight': 90.69570922851562, 'model.layers.25.self_attn.o_proj.weight': 4.8211283683776855, 'model.layers.25.mlp.gate_proj.weight': 86.09648895263672, 'model.layers.25.mlp.up_proj.weight': 14.497987747192383, 'model.layers.25.mlp.down_proj.weight': 85.0718994140625, 'model.layers.25.input_layernorm.weight': 3.1521644592285156, 'model.layers.25.post_attention_layernorm.weight': 0.8257291913032532, 'model.layers.26.self_attn.q_proj.weight': 15.60930061340332, 'model.layers.26.self_attn.k_proj.weight': 60.62446594238281, 'model.layers.26.self_attn.v_proj.weight': 3.335906982421875, 'model.layers.26.self_attn.o_proj.weight': 70.5920181274414, 'model.layers.26.mlp.gate_proj.weight': 6.244541168212891, 'model.layers.26.mlp.up_proj.weight': 23.825096130371094, 'model.layers.26.mlp.down_proj.weight': 66.04875946044922, 'model.layers.26.input_layernorm.weight': 1.7099910974502563, 'model.layers.26.post_attention_layernorm.weight': 2.733952045440674, 'model.layers.27.self_attn.q_proj.weight': 27.89179801940918, 'model.layers.27.self_attn.k_proj.weight': 7.037299633026123, 'model.layers.27.self_attn.v_proj.weight': 53.303192138671875, 'model.layers.27.self_attn.o_proj.weight': 31.342844009399414, 'model.layers.27.mlp.gate_proj.weight': 4.575723171234131, 'model.layers.27.mlp.up_proj.weight': 32.228424072265625, 'model.layers.27.mlp.down_proj.weight': 98.73921203613281, 'model.layers.27.input_layernorm.weight': 4.521480560302734, 'model.layers.27.post_attention_layernorm.weight': 4.7895827293396, 'model.layers.28.self_attn.q_proj.weight': 5.490924835205078, 'model.layers.28.self_attn.k_proj.weight': 6.446569919586182, 'model.layers.28.self_attn.v_proj.weight': 36.667449951171875, 'model.layers.28.self_attn.o_proj.weight': 9.744807243347168, 'model.layers.28.mlp.gate_proj.weight': 52.686004638671875, 'model.layers.28.mlp.up_proj.weight': 18.47812271118164, 'model.layers.28.mlp.down_proj.weight': 0.10639667510986328, 'model.layers.28.input_layernorm.weight': 5.1784515380859375, 'model.layers.28.post_attention_layernorm.weight': 0.8240885138511658, 'model.layers.29.self_attn.q_proj.weight': 10.2254638671875, 'model.layers.29.self_attn.k_proj.weight': 14.042658805847168, 'model.layers.29.self_attn.v_proj.weight': 29.998687744140625, 'model.layers.29.self_attn.o_proj.weight': 6.178681373596191, 'model.layers.29.mlp.gate_proj.weight': 14.618688583374023, 'model.layers.29.mlp.up_proj.weight': 24.72657012939453, 'model.layers.29.mlp.down_proj.weight': 25.498863220214844, 'model.layers.29.input_layernorm.weight': 6.801156520843506, 'model.layers.29.post_attention_layernorm.weight': 16.827030181884766, 'model.layers.30.self_attn.q_proj.weight': 0.8939738273620605, 'model.layers.30.self_attn.k_proj.weight': 24.32806396484375, 'model.layers.30.self_attn.v_proj.weight': 4.347783088684082, 'model.layers.30.self_attn.o_proj.weight': 31.399599075317383, 'model.layers.30.mlp.gate_proj.weight': 44.27931594848633, 'model.layers.30.mlp.up_proj.weight': 153.48779296875, 'model.layers.30.mlp.down_proj.weight': 480, 'model.layers.30.input_layernorm.weight': 12.225543022155762, 'model.layers.30.post_attention_layernorm.weight': 14.477718353271484, 'model.layers.31.self_attn.q_proj.weight': 6.388827800750732, 'model.layers.31.self_attn.k_proj.weight': 8.219558715820312, 'model.layers.31.self_attn.v_proj.weight': 14.031820297241211, 'model.layers.31.self_attn.o_proj.weight': 28.330585479736328, 'model.layers.31.mlp.gate_proj.weight': 30.65607452392578, 'model.layers.31.mlp.up_proj.weight': 215.90744018554688, 'model.layers.31.mlp.down_proj.weight': 460, 'model.layers.31.input_layernorm.weight': 3.5382285118103027, 'model.layers.31.post_attention_layernorm.weight': 8.597623825073242, 'model.norm.weight': 7.943634986877441, 'lm_head.weight': 5670.7392578125}\n",
    "sensitivity = []*32\n",
    "dict = {}\n",
    "clayer = 0\n",
    "total_weight = []\n",
    "for name, trace in hessian_trace.items():\n",
    "   if name.startswith(\"model.layers\"):\n",
    "       layer = int(name.split('.')[2])\n",
    "       if layer<0 and layer>=32:\n",
    "           continue\n",
    "       if clayer < layer:\n",
    "           clayer = layer\n",
    "           sensitivity.append(dict)\n",
    "           dict = {}\n",
    "       subname = \".\".join(name.split('.')[3:])\n",
    "       if subname.endswith(\".weight\"):\n",
    "           dict[subname[:-7]] = trace\n",
    "           total_weight.append(trace)\n",
    "sensitivity.append(dict)\n",
    "#print(sensitivity, len(sensitivity))\n",
    "total_weight = sorted(total_weight)\n",
    "print(sorted(total_weight), len(total_weight))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36fdf7b1-753d-4051-bb8c-c99ebad60a25",
   "metadata": {},
   "outputs": [],
   "source": [
    "hessian_trace = {'model.embed_tokens.weight': 329.64154052734375, 'model.layers.0.self_attn.W_pack.weight': 42.796390533447266, 'model.layers.0.self_attn.o_proj.weight': 103.05613708496094, 'model.layers.0.mlp.gate_proj.weight': 24.81538200378418, 'model.layers.0.mlp.down_proj.weight': 24.132038116455078, 'model.layers.0.mlp.up_proj.weight': 8.758073806762695, 'model.layers.0.input_layernorm.weight': 70.32990264892578, 'model.layers.0.post_attention_layernorm.weight': 5.113643646240234, 'model.layers.1.self_attn.W_pack.weight': 178.6654052734375, 'model.layers.1.self_attn.o_proj.weight': 22.058069229125977, 'model.layers.1.mlp.gate_proj.weight': 51.95944595336914, 'model.layers.1.mlp.down_proj.weight': 29.165515899658203, 'model.layers.1.mlp.up_proj.weight': 29.581310272216797, 'model.layers.1.input_layernorm.weight': 37.945228576660156, 'model.layers.1.post_attention_layernorm.weight': 7.57893180847168, 'model.layers.2.self_attn.W_pack.weight': 13.999284744262695, 'model.layers.2.self_attn.o_proj.weight': 64.07806396484375, 'model.layers.2.mlp.gate_proj.weight': 48.80052947998047, 'model.layers.2.mlp.down_proj.weight': 108.44867706298828, 'model.layers.2.mlp.up_proj.weight': 9.023494720458984, 'model.layers.2.input_layernorm.weight': 57.984458923339844, 'model.layers.2.post_attention_layernorm.weight': 81.6681900024414, 'model.layers.3.self_attn.W_pack.weight': 65.57622528076172, 'model.layers.3.self_attn.o_proj.weight': 7.046623229980469, 'model.layers.3.mlp.gate_proj.weight': 52.700416564941406, 'model.layers.3.mlp.down_proj.weight': 20.68716812133789, 'model.layers.3.mlp.up_proj.weight': 137.10081481933594, 'model.layers.3.input_layernorm.weight': 35.38990783691406, 'model.layers.3.post_attention_layernorm.weight': 4.662907600402832, 'model.layers.4.self_attn.W_pack.weight': 286.4843444824219, 'model.layers.4.self_attn.o_proj.weight': 80.577880859375, 'model.layers.4.mlp.gate_proj.weight': 219.2554168701172, 'model.layers.4.mlp.down_proj.weight': 135.39764404296875, 'model.layers.4.mlp.up_proj.weight': 15.108564376831055, 'model.layers.4.input_layernorm.weight': 10.707464218139648, 'model.layers.4.post_attention_layernorm.weight': 3.0128369331359863, 'model.layers.5.self_attn.W_pack.weight': 142.3785400390625, 'model.layers.5.self_attn.o_proj.weight': 166.6620635986328, 'model.layers.5.mlp.gate_proj.weight': 60.53947067260742, 'model.layers.5.mlp.down_proj.weight': 113.31942749023438, 'model.layers.5.mlp.up_proj.weight': 72.14067077636719, 'model.layers.5.input_layernorm.weight': 24.324478149414062, 'model.layers.5.post_attention_layernorm.weight': 19.52776336669922, 'model.layers.6.self_attn.W_pack.weight': 312.2221984863281, 'model.layers.6.self_attn.o_proj.weight': 21.91756820678711, 'model.layers.6.mlp.gate_proj.weight': 23.790668487548828, 'model.layers.6.mlp.down_proj.weight': 43.266746520996094, 'model.layers.6.mlp.up_proj.weight': 51.147911071777344, 'model.layers.6.input_layernorm.weight': 10.460563659667969, 'model.layers.6.post_attention_layernorm.weight': 7.471612930297852, 'model.layers.7.self_attn.W_pack.weight': 43.795536041259766, 'model.layers.7.self_attn.o_proj.weight': 114.42286682128906, 'model.layers.7.mlp.gate_proj.weight': 47.93408203125, 'model.layers.7.mlp.down_proj.weight': 250.892822265625, 'model.layers.7.mlp.up_proj.weight': 287.97216796875, 'model.layers.7.input_layernorm.weight': 74.07683563232422, 'model.layers.7.post_attention_layernorm.weight': 14.754398345947266, 'model.layers.8.self_attn.W_pack.weight': 229.9970703125, 'model.layers.8.self_attn.o_proj.weight': 97.09761047363281, 'model.layers.8.mlp.gate_proj.weight': 300.8734436035156, 'model.layers.8.mlp.down_proj.weight': 156.38694763183594, 'model.layers.8.mlp.up_proj.weight': 180.96337890625, 'model.layers.8.input_layernorm.weight': 22.925716400146484, 'model.layers.8.post_attention_layernorm.weight': 26.80377197265625, 'model.layers.9.self_attn.W_pack.weight': 305.01263427734375, 'model.layers.9.self_attn.o_proj.weight': 265.0890197753906, 'model.layers.9.mlp.gate_proj.weight': 200.70291137695312, 'model.layers.9.mlp.down_proj.weight': 307.24273681640625, 'model.layers.9.mlp.up_proj.weight': 59.81349182128906, 'model.layers.9.input_layernorm.weight': 33.39231491088867, 'model.layers.9.post_attention_layernorm.weight': 17.75054931640625, 'model.layers.10.self_attn.W_pack.weight': 184.53529357910156, 'model.layers.10.self_attn.o_proj.weight': 541.69140625, 'model.layers.10.mlp.gate_proj.weight': 147.36708068847656, 'model.layers.10.mlp.down_proj.weight': 252.9352264404297, 'model.layers.10.mlp.up_proj.weight': 33.421661376953125, 'model.layers.10.input_layernorm.weight': 18.690799713134766, 'model.layers.10.post_attention_layernorm.weight': 12.285993576049805, 'model.layers.11.self_attn.W_pack.weight': 154.85841369628906, 'model.layers.11.self_attn.o_proj.weight': 203.8035888671875, 'model.layers.11.mlp.gate_proj.weight': 391.6720275878906, 'model.layers.11.mlp.down_proj.weight': 317.4286193847656, 'model.layers.11.mlp.up_proj.weight': 113.96981811523438, 'model.layers.11.input_layernorm.weight': 25.63903045654297, 'model.layers.11.post_attention_layernorm.weight': 2.969888687133789, 'model.layers.12.self_attn.W_pack.weight': 26.624656677246094, 'model.layers.12.self_attn.o_proj.weight': 375.6956481933594, 'model.layers.12.mlp.gate_proj.weight': 55.655181884765625, 'model.layers.12.mlp.down_proj.weight': 18.173179626464844, 'model.layers.12.mlp.up_proj.weight': 29.545413970947266, 'model.layers.12.input_layernorm.weight': 15.440571784973145, 'model.layers.12.post_attention_layernorm.weight': 26.800992965698242, 'model.layers.13.self_attn.W_pack.weight': 140.33468627929688, 'model.layers.13.self_attn.o_proj.weight': 7.571754455566406, 'model.layers.13.mlp.gate_proj.weight': 70.99327087402344, 'model.layers.13.mlp.down_proj.weight': 752.7752075195312, 'model.layers.13.mlp.up_proj.weight': 2.7689380645751953, 'model.layers.13.input_layernorm.weight': 8.201879501342773, 'model.layers.13.post_attention_layernorm.weight': 12.290473937988281, 'model.layers.14.self_attn.W_pack.weight': 131.17242431640625, 'model.layers.14.self_attn.o_proj.weight': 109.42910766601562, 'model.layers.14.mlp.gate_proj.weight': 85.70367431640625, 'model.layers.14.mlp.down_proj.weight': 566.7034301757812, 'model.layers.14.mlp.up_proj.weight': 79.34734344482422, 'model.layers.14.input_layernorm.weight': 6.516969203948975, 'model.layers.14.post_attention_layernorm.weight': 1.6866350173950195, 'model.layers.15.self_attn.W_pack.weight': 26.574310302734375, 'model.layers.15.self_attn.o_proj.weight': 216.2356719970703, 'model.layers.15.mlp.gate_proj.weight': 114.02530670166016, 'model.layers.15.mlp.down_proj.weight': 399.50927734375, 'model.layers.15.mlp.up_proj.weight': 171.49557495117188, 'model.layers.15.input_layernorm.weight': 21.24315643310547, 'model.layers.15.post_attention_layernorm.weight': 20.62063980102539, 'model.layers.16.self_attn.W_pack.weight': 199.70162963867188, 'model.layers.16.self_attn.o_proj.weight': 3.2754592895507812, 'model.layers.16.mlp.gate_proj.weight': 40.83314895629883, 'model.layers.16.mlp.down_proj.weight': 192.13504028320312, 'model.layers.16.mlp.up_proj.weight': 2.7925033569335938, 'model.layers.16.input_layernorm.weight': 35.19383239746094, 'model.layers.16.post_attention_layernorm.weight': 10.788374900817871, 'model.layers.17.self_attn.W_pack.weight': 89.20930480957031, 'model.layers.17.self_attn.o_proj.weight': 383.44110107421875, 'model.layers.17.mlp.gate_proj.weight': 498.843994140625, 'model.layers.17.mlp.down_proj.weight': 27.260643005371094, 'model.layers.17.mlp.up_proj.weight': 161.88375854492188, 'model.layers.17.input_layernorm.weight': 20.749549865722656, 'model.layers.17.post_attention_layernorm.weight': 12.610977172851562, 'model.layers.18.self_attn.W_pack.weight': 572.3245849609375, 'model.layers.18.self_attn.o_proj.weight': 135.09368896484375, 'model.layers.18.mlp.gate_proj.weight': 98.05675506591797, 'model.layers.18.mlp.down_proj.weight': 197.47091674804688, 'model.layers.18.mlp.up_proj.weight': 76.56133270263672, 'model.layers.18.input_layernorm.weight': 34.516334533691406, 'model.layers.18.post_attention_layernorm.weight': 16.425294876098633, 'model.layers.19.self_attn.W_pack.weight': 454.207275390625, 'model.layers.19.self_attn.o_proj.weight': 259.7581787109375, 'model.layers.19.mlp.gate_proj.weight': 88.37212371826172, 'model.layers.19.mlp.down_proj.weight': 133.8662109375, 'model.layers.19.mlp.up_proj.weight': 111.13916015625, 'model.layers.19.input_layernorm.weight': 20.384254455566406, 'model.layers.19.post_attention_layernorm.weight': 21.431726455688477, 'model.layers.20.self_attn.W_pack.weight': 618.8531494140625, 'model.layers.20.self_attn.o_proj.weight': 115.33079528808594, 'model.layers.20.mlp.gate_proj.weight': 42.299503326416016, 'model.layers.20.mlp.down_proj.weight': 99.01408386230469, 'model.layers.20.mlp.up_proj.weight': 123.78594970703125, 'model.layers.20.input_layernorm.weight': 9.068575859069824, 'model.layers.20.post_attention_layernorm.weight': 12.204352378845215, 'model.layers.21.self_attn.W_pack.weight': 315.5662536621094, 'model.layers.21.self_attn.o_proj.weight': 82.34078979492188, 'model.layers.21.mlp.gate_proj.weight': 277.2019348144531, 'model.layers.21.mlp.down_proj.weight': 322.60748291015625, 'model.layers.21.mlp.up_proj.weight': 46.55487823486328, 'model.layers.21.input_layernorm.weight': 20.893672943115234, 'model.layers.21.post_attention_layernorm.weight': 10.55594253540039, 'model.layers.22.self_attn.W_pack.weight': 399.7720947265625, 'model.layers.22.self_attn.o_proj.weight': 96.72584533691406, 'model.layers.22.mlp.gate_proj.weight': 46.910247802734375, 'model.layers.22.mlp.down_proj.weight': 139.707275390625, 'model.layers.22.mlp.up_proj.weight': 235.64901733398438, 'model.layers.22.input_layernorm.weight': 17.862411499023438, 'model.layers.22.post_attention_layernorm.weight': 13.19644546508789, 'model.layers.23.self_attn.W_pack.weight': 123.12423706054688, 'model.layers.23.self_attn.o_proj.weight': 0.06105804443359375, 'model.layers.23.mlp.gate_proj.weight': 8.839845657348633, 'model.layers.23.mlp.down_proj.weight': 285.197998046875, 'model.layers.23.mlp.up_proj.weight': 81.29641723632812, 'model.layers.23.input_layernorm.weight': 6.074459075927734, 'model.layers.23.post_attention_layernorm.weight': 12.534507751464844, 'model.layers.24.self_attn.W_pack.weight': 255.33238220214844, 'model.layers.24.self_attn.o_proj.weight': 133.9571533203125, 'model.layers.24.mlp.gate_proj.weight': 83.53457641601562, 'model.layers.24.mlp.down_proj.weight': 27.772518157958984, 'model.layers.24.mlp.up_proj.weight': 41.26900100708008, 'model.layers.24.input_layernorm.weight': 6.756904125213623, 'model.layers.24.post_attention_layernorm.weight': 14.129257202148438, 'model.layers.25.self_attn.W_pack.weight': 103.37714385986328, 'model.layers.25.self_attn.o_proj.weight': 56.4900016784668, 'model.layers.25.mlp.gate_proj.weight': 208.2404022216797, 'model.layers.25.mlp.down_proj.weight': 234.99159240722656, 'model.layers.25.mlp.up_proj.weight': 288.63568115234375, 'model.layers.25.input_layernorm.weight': 1.1440134048461914, 'model.layers.25.post_attention_layernorm.weight': 3.3634839057922363, 'model.layers.26.self_attn.W_pack.weight': 105.59356689453125, 'model.layers.26.self_attn.o_proj.weight': 30.458263397216797, 'model.layers.26.mlp.gate_proj.weight': 74.4517593383789, 'model.layers.26.mlp.down_proj.weight': 164.03488159179688, 'model.layers.26.mlp.up_proj.weight': 69.63771057128906, 'model.layers.26.input_layernorm.weight': 14.483466148376465, 'model.layers.26.post_attention_layernorm.weight': 11.815056800842285, 'model.layers.27.self_attn.W_pack.weight': 187.55075073242188, 'model.layers.27.self_attn.o_proj.weight': 125.0672607421875, 'model.layers.27.mlp.gate_proj.weight': 77.65340423583984, 'model.layers.27.mlp.down_proj.weight': 9.314472198486328, 'model.layers.27.mlp.up_proj.weight': 162.0220489501953, 'model.layers.27.input_layernorm.weight': 4.740932941436768, 'model.layers.27.post_attention_layernorm.weight': 0.9526500701904297, 'model.layers.28.self_attn.W_pack.weight': 78.34429931640625, 'model.layers.28.self_attn.o_proj.weight': 6.596046447753906, 'model.layers.28.mlp.gate_proj.weight': 22.383790969848633, 'model.layers.28.mlp.down_proj.weight': 131.0739288330078, 'model.layers.28.mlp.up_proj.weight': 40.78053665161133, 'model.layers.28.input_layernorm.weight': 9.127517700195312, 'model.layers.28.post_attention_layernorm.weight': 15.317330360412598, 'model.layers.29.self_attn.W_pack.weight': 120.24341583251953, 'model.layers.29.self_attn.o_proj.weight': 112.29653930664062, 'model.layers.29.mlp.gate_proj.weight': 38.29157257080078, 'model.layers.29.mlp.down_proj.weight': 105.41783142089844, 'model.layers.29.mlp.up_proj.weight': 51.532859802246094, 'model.layers.29.input_layernorm.weight': 17.701610565185547, 'model.layers.29.post_attention_layernorm.weight': 0.9163107872009277, 'model.layers.30.self_attn.W_pack.weight': 11.378253936767578, 'model.layers.30.self_attn.o_proj.weight': 34.50566101074219, 'model.layers.30.mlp.gate_proj.weight': 23.995132446289062, 'model.layers.30.mlp.down_proj.weight': 114.779052734375, 'model.layers.30.mlp.up_proj.weight': 3.9347076416015625, 'model.layers.30.input_layernorm.weight': 5.532497882843018, 'model.layers.30.post_attention_layernorm.weight': 8.656522750854492, 'model.layers.31.self_attn.W_pack.weight': 20.2293701171875, 'model.layers.31.self_attn.o_proj.weight': 26.363689422607422, 'model.layers.31.mlp.gate_proj.weight': 121.09629821777344, 'model.layers.31.mlp.down_proj.weight': 71.51612091064453, 'model.layers.31.mlp.up_proj.weight': 40.007328033447266, 'model.layers.31.input_layernorm.weight': 0.8192113637924194, 'model.layers.31.post_attention_layernorm.weight': 4.652134895324707, 'model.layers.32.self_attn.W_pack.weight': 49.29255294799805, 'model.layers.32.self_attn.o_proj.weight': 116.1943359375, 'model.layers.32.mlp.gate_proj.weight': 52.72003173828125, 'model.layers.32.mlp.down_proj.weight': 84.19198608398438, 'model.layers.32.mlp.up_proj.weight': 77.00352478027344, 'model.layers.32.input_layernorm.weight': 1.0432825088500977, 'model.layers.32.post_attention_layernorm.weight': 2.8331971168518066, 'model.layers.33.self_attn.W_pack.weight': 79.7201156616211, 'model.layers.33.self_attn.o_proj.weight': 60.495941162109375, 'model.layers.33.mlp.gate_proj.weight': 74.68788146972656, 'model.layers.33.mlp.down_proj.weight': 136.8636016845703, 'model.layers.33.mlp.up_proj.weight': 103.47222900390625, 'model.layers.33.input_layernorm.weight': 15.130853652954102, 'model.layers.33.post_attention_layernorm.weight': 3.2109265327453613, 'model.layers.34.self_attn.W_pack.weight': 28.352020263671875, 'model.layers.34.self_attn.o_proj.weight': 77.9732666015625, 'model.layers.34.mlp.gate_proj.weight': 219.70632934570312, 'model.layers.34.mlp.down_proj.weight': 260.97686767578125, 'model.layers.34.mlp.up_proj.weight': 85.12864685058594, 'model.layers.34.input_layernorm.weight': 0.31279802322387695, 'model.layers.34.post_attention_layernorm.weight': 2.4324021339416504, 'model.layers.35.self_attn.W_pack.weight': 194.6346893310547, 'model.layers.35.self_attn.o_proj.weight': 12.986024856567383, 'model.layers.35.mlp.gate_proj.weight': 48.56150436401367, 'model.layers.35.mlp.down_proj.weight': 152.96719360351562, 'model.layers.35.mlp.up_proj.weight': 26.867788314819336, 'model.layers.35.input_layernorm.weight': 3.910489082336426, 'model.layers.35.post_attention_layernorm.weight': 3.828453540802002, 'model.layers.36.self_attn.W_pack.weight': 174.69598388671875, 'model.layers.36.self_attn.o_proj.weight': 89.51901245117188, 'model.layers.36.mlp.gate_proj.weight': 210.34266662597656, 'model.layers.36.mlp.down_proj.weight': 84.82920837402344, 'model.layers.36.mlp.up_proj.weight': 152.73324584960938, 'model.layers.36.input_layernorm.weight': 11.520204544067383, 'model.layers.36.post_attention_layernorm.weight': 13.977842330932617, 'model.layers.37.self_attn.W_pack.weight': 52.232139587402344, 'model.layers.37.self_attn.o_proj.weight': 38.33332824707031, 'model.layers.37.mlp.gate_proj.weight': 230.9942626953125, 'model.layers.37.mlp.down_proj.weight': 290.7174072265625, 'model.layers.37.mlp.up_proj.weight': 17.05032730102539, 'model.layers.37.input_layernorm.weight': 4.776288986206055, 'model.layers.37.post_attention_layernorm.weight': 3.9278564453125, 'model.layers.38.self_attn.W_pack.weight': 163.54708862304688, 'model.layers.38.self_attn.o_proj.weight': 52.380855560302734, 'model.layers.38.mlp.gate_proj.weight': 15.507740020751953, 'model.layers.38.mlp.down_proj.weight': 188.3494873046875, 'model.layers.38.mlp.up_proj.weight': 5.84941291809082, 'model.layers.38.input_layernorm.weight': 10.675273895263672, 'model.layers.38.post_attention_layernorm.weight': 4.87885856628418, 'model.layers.39.self_attn.W_pack.weight': 7.286237716674805, 'model.layers.39.self_attn.o_proj.weight': 18.547489166259766, 'model.layers.39.mlp.gate_proj.weight': 34.71479415893555, 'model.layers.39.mlp.down_proj.weight': 151.24029541015625, 'model.layers.39.mlp.up_proj.weight': 100.97728729248047, 'model.layers.39.input_layernorm.weight': 4.233889102935791, 'model.layers.39.post_attention_layernorm.weight': 0.3445558547973633, 'model.norm.weight': 0.5114564895629883, 'lm_head.weight': 4199.0283203125}\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
